Merge "Speed-up for ext-intra" into nextgenv2
diff --git a/test/avg_test.cc b/test/avg_test.cc
index 7d5380f..44d8dd7 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -55,19 +55,19 @@
}
// Sum Pixels
- unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) {
+ unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch) {
unsigned int average = 0;
for (int h = 0; h < 8; ++h)
for (int w = 0; w < 8; ++w)
- average += source[h * source_stride_ + w];
+ average += source[h * pitch + w];
return ((average + 32) >> 6);
}
- unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) {
+ unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch) {
unsigned int average = 0;
for (int h = 0; h < 4; ++h)
for (int w = 0; w < 4; ++w)
- average += source[h * source_stride_ + w];
+ average += source[h * pitch + w];
return ((average + 8) >> 4);
}
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 0826788..12022be 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -279,8 +279,7 @@
uint16_t *output_ptr,
unsigned int output_stride,
unsigned int output_width,
- unsigned int output_height,
- int bd) {
+ unsigned int output_height) {
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; ++j) {
@@ -306,7 +305,7 @@
highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
output_width, output_height, bd);
highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride,
- output_width, output_height, bd);
+ output_width, output_height);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index b98f8c8..9d5074e 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -850,8 +850,7 @@
const vpx_svc_extra_cfg_t *svc_params,
int spatial_layers,
int temporal_layers,
- int temporal_layering_mode,
- unsigned int total_rate) {
+ int temporal_layering_mode) {
int sl, spatial_layer_target;
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = {0};
@@ -914,8 +913,7 @@
cfg_.rc_target_bitrate = i;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
- cfg_.ts_number_layers, cfg_.temporal_layering_mode,
- cfg_.rc_target_bitrate);
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
<< " The datarate for the file exceeds the target by too much!";
@@ -953,8 +951,7 @@
cfg_.rc_target_bitrate = 800;
ResetModel();
assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
- cfg_.ts_number_layers, cfg_.temporal_layering_mode,
- cfg_.rc_target_bitrate);
+ cfg_.ts_number_layers, cfg_.temporal_layering_mode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
<< " The datarate for the file exceeds the target by too much!";
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 0449b52..d6cc5e4 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -276,12 +276,12 @@
}
void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
idct16x16_10(in, out, stride);
}
void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
idct16x16_12(in, out, stride);
}
@@ -778,7 +778,7 @@
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
- void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
+ void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride);
}
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
index 9a2ad2f..cd0dca2 100644
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -100,7 +100,7 @@
}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
- ::libvpx_test::Encoder *encoder) {
+ ::libvpx_test::Encoder * /*encoder*/) {
frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
VP8_EFLAG_NO_UPD_GF |
VP8_EFLAG_NO_UPD_ARF);
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 3f6b738..0c91aee 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -40,7 +40,7 @@
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
vpx_fdct4x4_c(in, out, stride);
}
@@ -49,7 +49,7 @@
}
void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- int tx_type) {
+ int /*tx_type*/) {
vp9_fwht4x4_c(in, out, stride);
}
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 72d2aed..edf4682 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -47,7 +47,7 @@
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
-void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
+void reference_8x8_dct_1d(const double in[8], double out[8]) {
const double kInvSqrt2 = 0.707106781186547524400844362104;
for (int k = 0; k < 8; k++) {
out[k] = 0.0;
@@ -65,7 +65,7 @@
double temp_in[8], temp_out[8];
for (int j = 0; j < 8; ++j)
temp_in[j] = input[j*8 + i];
- reference_8x8_dct_1d(temp_in, temp_out, 1);
+ reference_8x8_dct_1d(temp_in, temp_out);
for (int j = 0; j < 8; ++j)
output[j * 8 + i] = temp_out[j];
}
@@ -74,7 +74,7 @@
double temp_in[8], temp_out[8];
for (int j = 0; j < 8; ++j)
temp_in[j] = output[j + i*8];
- reference_8x8_dct_1d(temp_in, temp_out, 1);
+ reference_8x8_dct_1d(temp_in, temp_out);
// Scale by some magic number
for (int j = 0; j < 8; ++j)
output[j + i * 8] = temp_out[j] * 2;
@@ -82,7 +82,8 @@
}
-void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
+ int /*tx_type*/) {
vpx_fdct8x8_c(in, out, stride);
}
@@ -642,7 +643,7 @@
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride);
}
- void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}
+ void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
IdctFunc ref_txfm_;
IdctFunc inv_txfm_;
diff --git a/test/hbd_metrics_test.cc b/test/hbd_metrics_test.cc
index bf75a29..dac001f 100644
--- a/test/hbd_metrics_test.cc
+++ b/test/hbd_metrics_test.cc
@@ -16,6 +16,7 @@
#include "test/acm_random.h"
#include "test/util.h"
#include "./vpx_config.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_dsp/ssim.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/msvc.h"
@@ -32,6 +33,19 @@
const YV12_BUFFER_CONFIG *dest,
uint32_t bd);
+double compute_hbd_psnr(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest, uint32_t bit_depth) {
+ PSNR_STATS psnr;
+ calc_highbd_psnr(source, dest, &psnr, bit_depth, bit_depth);
+ return psnr.psnr[0];
+}
+
+double compute_psnr(const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *dest) {
+ PSNR_STATS psnr;
+ calc_psnr(source, dest, &psnr);
+ return psnr.psnr[0];
+}
double compute_hbd_psnrhvs(const YV12_BUFFER_CONFIG *source,
const YV12_BUFFER_CONFIG *dest,
@@ -208,5 +222,13 @@
MetricTestTParam(&compute_psnrhvs, &compute_hbd_psnrhvs, 12,
kPhvs_thresh)));
+INSTANTIATE_TEST_CASE_P(
+ PSNR, HBDMetricsTest,
+ ::testing::Values(
+ MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 10,
+ kPhvs_thresh),
+ MetricTestTParam(&compute_psnr, &compute_hbd_psnr, 12,
+ kPhvs_thresh)));
+
} // namespace
diff --git a/test/resize_test.cc b/test/resize_test.cc
index c5f05f3..0177308 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -90,74 +90,178 @@
unsigned int h;
};
-unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
- if (frame < 10)
- return val;
- if (frame < 20)
- return val * 3 / 4;
- if (frame < 30)
- return val / 2;
- if (frame < 40)
- return val;
- if (frame < 50)
- return val * 3 / 4;
- if (frame < 60)
- return val / 2;
- if (frame < 70)
- return val * 3 / 4;
- if (frame < 80)
- return val;
- if (frame < 90)
- return val * 3 / 4;
- if (frame < 100)
- return val / 2;
- if (frame < 110)
- return val * 3 / 4;
- if (frame < 120)
- return val;
- if (frame < 130)
- return val * 3 / 4;
- if (frame < 140)
- return val / 2;
- if (frame < 150)
- return val * 3 / 4;
- if (frame < 160)
- return val;
- if (frame < 170)
- return val / 2;
- if (frame < 180)
- return val * 3 / 4;
- if (frame < 190)
- return val;
- if (frame < 200)
- return val * 3 / 4;
- if (frame < 210)
- return val / 2;
- if (frame < 220)
- return val * 3 / 4;
- if (frame < 230)
- return val;
- if (frame < 240)
- return val / 2;
- if (frame < 250)
- return val * 3 / 4;
- return val;
+void ScaleForFrameNumber(unsigned int frame,
+ unsigned int initial_w,
+ unsigned int initial_h,
+ unsigned int *w,
+ unsigned int *h,
+ int flag_codec) {
+ if (frame < 10) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 20) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 30) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 40) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 50) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 60) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 70) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 80) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 90) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 100) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 110) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 120) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 130) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 140) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 150) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 160) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 170) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 180) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 190) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 200) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 210) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 220) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 230) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ if (frame < 240) {
+ *w = initial_w * 3 / 4;
+ *h = initial_h * 3 / 4;
+ return;
+ }
+ if (frame < 250) {
+ *w = initial_w / 2;
+ *h = initial_h / 2;
+ return;
+ }
+ if (frame < 260) {
+ *w = initial_w;
+ *h = initial_h;
+ return;
+ }
+ // Go down very low.
+ if (frame < 270) {
+ *w = initial_w / 4;
+ *h = initial_h / 4;
+ return;
+ }
+ if (flag_codec == 1) {
+ // Cases that only works for VP9.
+ // For VP9: Swap width and height of original.
+ if (frame < 320) {
+ *w = initial_h;
+ *h = initial_w;
+ return;
+ }
+ }
+ *w = initial_w;
+ *h = initial_h;
}
class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
public:
ResizingVideoSource() {
SetSize(kInitialWidth, kInitialHeight);
- limit_ = 300;
+ limit_ = 350;
}
-
+ int flag_codec_;
virtual ~ResizingVideoSource() {}
protected:
virtual void Next() {
++frame_;
- SetSize(ScaleForFrameNumber(frame_, kInitialWidth),
- ScaleForFrameNumber(frame_, kInitialHeight));
+ unsigned int width;
+ unsigned int height;
+ ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
+ flag_codec_);
+ SetSize(width, height);
FillFrame();
}
};
@@ -184,15 +288,17 @@
TEST_P(ResizeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
+ video.flag_codec_ = 0;
cfg_.g_lag_in_frames = 0;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
const unsigned int frame = static_cast<unsigned>(info->pts);
- const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
- const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
-
+ unsigned int expected_w;
+ unsigned int expected_h;
+ ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
+ &expected_w, &expected_h, 0);
EXPECT_EQ(expected_w, info->w)
<< "Frame " << frame << " had unexpected width";
EXPECT_EQ(expected_h, info->h)
@@ -386,6 +492,7 @@
TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
+ video.flag_codec_ = 1;
DefaultConfig();
// Disable internal resize for this test.
cfg_.rc_resize_allowed = 0;
@@ -395,9 +502,10 @@
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
info != frame_info_list_.end(); ++info) {
const unsigned int frame = static_cast<unsigned>(info->pts);
- const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
- const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
-
+ unsigned int expected_w;
+ unsigned int expected_h;
+ ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
+ &expected_w, &expected_h, 1);
EXPECT_EQ(expected_w, info->w)
<< "Frame " << frame << " had unexpected width";
EXPECT_EQ(expected_h, info->h)
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 6f0cbdf..f1aa4d7 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -103,7 +103,7 @@
const int mode = std::tr1::get<kDecodeMode>(input);
libvpx_test::CompressedVideoSource *video = NULL;
vpx_codec_flags_t flags = 0;
- vpx_codec_dec_cfg_t cfg = {0};
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
char str[256];
if (mode == kFrameParallelMode) {
diff --git a/test/vp9_encoder_parms_get_to_decoder.cc b/test/vp9_encoder_parms_get_to_decoder.cc
index 3ef6022..bd84098 100644
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@@ -45,9 +45,9 @@
};
const EncodeParameters kVP9EncodeParameterSet[] = {
- {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601},
- {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709},
- {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020},
+ {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601, { 0, 0 }},
+ {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709, { 0, 0 }},
+ {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020, { 0, 0 }},
{0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 }},
// TODO(JBB): Test profiles (requires more work).
};
@@ -93,7 +93,7 @@
}
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
- const libvpx_test::VideoSource &video,
+ const libvpx_test::VideoSource & /*video*/,
libvpx_test::Decoder *decoder) {
vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder();
vpx_codec_alg_priv_t *const priv =
diff --git a/test/vp9_error_block_test.cc b/test/vp9_error_block_test.cc
index 77b12ea..23a249e 100644
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@@ -164,7 +164,7 @@
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps) {
- assert(bps == 8);
+ EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz);
}
@@ -173,7 +173,7 @@
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps) {
- assert(bps == 8);
+ EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz);
}
@@ -195,7 +195,7 @@
const tran_low_t *dqcoeff,
intptr_t block_size,
int64_t *ssz, int bps) {
- assert(bps == 8);
+ EXPECT_EQ(8, bps);
return vp9_highbd_block_error_8bit_avx(coeff, dqcoeff, block_size, ssz);
}
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index 8ac5c33..29a653f 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -62,7 +62,7 @@
encoder_initialized_ = false;
}
- virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/,
::libvpx_test::Encoder *encoder) {
if (!encoder_initialized_) {
// Encode 4 column tiles.
diff --git a/test/vp9_intrapred_test.cc b/test/vp9_intrapred_test.cc
index e6198af..416f3c3 100644
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -34,7 +34,7 @@
virtual ~VP9IntraPredBase() { libvpx_test::ClearSystemState(); }
protected:
- virtual void Predict(PREDICTION_MODE mode) = 0;
+ virtual void Predict() = 0;
void CheckPrediction(int test_case_number, int *error_count) const {
// For each pixel ensure that the calculated value is the same as reference.
@@ -73,7 +73,7 @@
left_col_[y] = rnd.Rand16() & mask_;
}
}
- Predict(DC_PRED);
+ Predict();
CheckPrediction(i, &error_count);
}
ASSERT_EQ(0, error_count);
@@ -106,7 +106,7 @@
mask_ = (1 << bit_depth_) - 1;
}
- virtual void Predict(PREDICTION_MODE mode) {
+ virtual void Predict() {
const uint16_t *const_above_row = above_row_;
const uint16_t *const_left_col = left_col_;
ref_fn_(ref_dst_, stride_, const_above_row, const_left_col, bit_depth_);
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index f41b8d9..af6ef36 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -115,7 +115,6 @@
#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
#if CONFIG_EXT_TX
-#define USE_DST2 1
#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA
#endif // CONFIG_EXT_TX
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index d42f5f5..dbb50fb 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -20,7 +20,6 @@
#if CONFIG_EXT_TX
void idst4_c(const tran_low_t *input, tran_low_t *output) {
-#if USE_DST2
tran_low_t step[4];
tran_high_t temp1, temp2;
// stage 1
@@ -38,29 +37,9 @@
output[1] = WRAPLOW(-step[1] - step[2], 8);
output[2] = WRAPLOW(step[1] - step[2], 8);
output[3] = WRAPLOW(step[3] - step[0], 8);
-#else
- // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
- static const int32_t sinvalue_lookup[] = {
- 141124871, 228344838,
- };
- int64_t sum;
- int64_t s03 = (input[0] + input[3]);
- int64_t d03 = (input[0] - input[3]);
- int64_t s12 = (input[1] + input[2]);
- int64_t d12 = (input[1] - input[2]);
- sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
- output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
- output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
- output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
- output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
-#endif // USE_DST2
}
void idst8_c(const tran_low_t *input, tran_low_t *output) {
-#if USE_DST2
// vp9_igentx8(input, output, Tx8);
tran_low_t step1[8], step2[8];
tran_high_t temp1, temp2;
@@ -113,47 +92,9 @@
output[5] = WRAPLOW(-step1[2] + step1[5], 8);
output[6] = WRAPLOW(step1[1] - step1[6], 8);
output[7] = WRAPLOW(-step1[0] + step1[7], 8);
-#else
- // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
- static const int32_t sinvalue_lookup[] = {
- 86559612, 162678858, 219176632, 249238470
- };
- int64_t sum;
- int64_t s07 = (input[0] + input[7]);
- int64_t d07 = (input[0] - input[7]);
- int64_t s16 = (input[1] + input[6]);
- int64_t d16 = (input[1] - input[6]);
- int64_t s25 = (input[2] + input[5]);
- int64_t d25 = (input[2] - input[5]);
- int64_t s34 = (input[3] + input[4]);
- int64_t d34 = (input[3] - input[4]);
- sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
- s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
- output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
- d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
- output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = (s07 + s16 - s34)* sinvalue_lookup[2];
- output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
- d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
- output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
- s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
- output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = (d07 - d16 + d34)* sinvalue_lookup[2];
- output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
- s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
- output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
- d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
- output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
-#endif // USE_DST2
}
void idst16_c(const tran_low_t *input, tran_low_t *output) {
-#if USE_DST2
tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2;
@@ -316,112 +257,75 @@
output[13] = WRAPLOW(-step2[2] + step2[13], 8);
output[14] = WRAPLOW(step2[1] - step2[14], 8);
output[15] = WRAPLOW(-step2[0] + step2[15], 8);
-#else
- // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
- static const int32_t sinvalue_lookup[] = {
- 47852167, 94074787, 137093803, 175444254,
- 207820161, 233119001, 250479254, 259309736
- };
- int64_t sum;
- int64_t s015 = (input[0] + input[15]);
- int64_t d015 = (input[0] - input[15]);
- int64_t s114 = (input[1] + input[14]);
- int64_t d114 = (input[1] - input[14]);
- int64_t s213 = (input[2] + input[13]);
- int64_t d213 = (input[2] - input[13]);
- int64_t s312 = (input[3] + input[12]);
- int64_t d312 = (input[3] - input[12]);
- int64_t s411 = (input[4] + input[11]);
- int64_t d411 = (input[4] - input[11]);
- int64_t s510 = (input[5] + input[10]);
- int64_t d510 = (input[5] - input[10]);
- int64_t s69 = (input[6] + input[9]);
- int64_t d69 = (input[6] - input[9]);
- int64_t s78 = (input[7] + input[8]);
- int64_t d78 = (input[7] - input[8]);
- sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
- s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
- s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
- s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
- output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
- d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
- d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
- d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
- output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
- s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
- s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
- s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
- output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
- d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
- d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
- d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
- output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
- s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
- s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
- s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
- output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
- d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
- d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
- d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
- output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
- s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
- s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
- s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
- output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
- d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
- d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
- d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
- output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
- s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
- s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
- s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
- output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
- d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
- d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
- d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
- output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
- s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
- s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
- s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
- output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
- d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
- d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
- d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
- output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
- s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
- s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
- s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
- output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
- d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
- d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
- d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
- output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
- s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
- s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
- s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
- output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
- sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
- d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
- d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
- d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
- output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
-#endif // USE_DST2
}
+#if CONFIG_EXT_TX
+// For use in lieu of DST
+static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 8; ++i) {
+ output[i] = input[16 + i] * 4;
+ output[24 + i] = input[24 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+ }
+ idct16_c(inputhalf, output + 8);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+ }
+ idct16_c(inputhalf, output + 16);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 8; ++i) {
+ output[i] = input[16 + i] * 4;
+ output[24 + i] = input[24 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * Sqrt2, bd);
+ }
+ vpx_highbd_idct16_c(inputhalf, output + 8, bd);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * Sqrt2, bd);
+ }
+ vpx_highbd_idct16_c(inputhalf, output + 16, bd);
+ // Note overall scaling factor is 4 times orthogonal
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
+
// Inverse identiy transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int bs) {
@@ -483,7 +387,6 @@
#if CONFIG_VP9_HIGHBITDEPTH
void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
-#if USE_DST2
tran_low_t step[4];
tran_high_t temp1, temp2;
(void) bd;
@@ -502,34 +405,9 @@
output[1] = WRAPLOW(-step[1] - step[2], bd);
output[2] = WRAPLOW(step[1] - step[2], bd);
output[3] = WRAPLOW(step[3] - step[0], bd);
-#else
- // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
- static const int32_t sinvalue_lookup[] = {
- 141124871, 228344838,
- };
- int64_t sum;
- int64_t s03 = (input[0] + input[3]);
- int64_t d03 = (input[0] - input[3]);
- int64_t s12 = (input[1] + input[2]);
- int64_t d12 = (input[1] - input[2]);
-
-#if !CONFIG_EMULATE_HARDWARE
- (void)bd;
-#endif
-
- sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
- output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
- output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
- output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
- output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
-#endif // USE_DST2
}
void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
-#if USE_DST2
tran_low_t step1[8], step2[8];
tran_high_t temp1, temp2;
(void) bd;
@@ -582,52 +460,9 @@
output[5] = WRAPLOW(-step1[2] + step1[5], bd);
output[6] = WRAPLOW(step1[1] - step1[6], bd);
output[7] = WRAPLOW(-step1[0] + step1[7], bd);
-#else
- // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
- static const int32_t sinvalue_lookup[] = {
- 86559612, 162678858, 219176632, 249238470
- };
- int64_t sum;
- int64_t s07 = (input[0] + input[7]);
- int64_t d07 = (input[0] - input[7]);
- int64_t s16 = (input[1] + input[6]);
- int64_t d16 = (input[1] - input[6]);
- int64_t s25 = (input[2] + input[5]);
- int64_t d25 = (input[2] - input[5]);
- int64_t s34 = (input[3] + input[4]);
- int64_t d34 = (input[3] - input[4]);
-
-#if !CONFIG_EMULATE_HARDWARE
- (void)bd;
-#endif
-
- sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
- s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
- output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
- d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
- output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = (s07 + s16 - s34)* sinvalue_lookup[2];
- output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
- d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
- output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
- s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
- output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = (d07 - d16 + d34)* sinvalue_lookup[2];
- output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
- s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
- output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
- d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
- output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
-#endif // USE_DST2
}
void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
-#if USE_DST2
// vp9_highbd_igentx16(input, output, bd, Tx16);
tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2;
@@ -792,115 +627,6 @@
output[13] = WRAPLOW(-step2[2] + step2[13], bd);
output[14] = WRAPLOW(step2[1] - step2[14], bd);
output[15] = WRAPLOW(-step2[0] + step2[15], bd);
-#else
- // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
- static const int32_t sinvalue_lookup[] = {
- 47852167, 94074787, 137093803, 175444254,
- 207820161, 233119001, 250479254, 259309736
- };
- int64_t sum;
- int64_t s015 = (input[0] + input[15]);
- int64_t d015 = (input[0] - input[15]);
- int64_t s114 = (input[1] + input[14]);
- int64_t d114 = (input[1] - input[14]);
- int64_t s213 = (input[2] + input[13]);
- int64_t d213 = (input[2] - input[13]);
- int64_t s312 = (input[3] + input[12]);
- int64_t d312 = (input[3] - input[12]);
- int64_t s411 = (input[4] + input[11]);
- int64_t d411 = (input[4] - input[11]);
- int64_t s510 = (input[5] + input[10]);
- int64_t d510 = (input[5] - input[10]);
- int64_t s69 = (input[6] + input[9]);
- int64_t d69 = (input[6] - input[9]);
- int64_t s78 = (input[7] + input[8]);
- int64_t d78 = (input[7] - input[8]);
-
-#if !CONFIG_EMULATE_HARDWARE
- (void)bd;
-#endif
-
- sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
- s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
- s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
- s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
- output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
- d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
- d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
- d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
- output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
- s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
- s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
- s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
- output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
- d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
- d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
- d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
- output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
- s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
- s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
- s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
- output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
- d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
- d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
- d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
- output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
- s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
- s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
- s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
- output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
- d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
- d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
- d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
- output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
- s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
- s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
- s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
- output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
- d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
- d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
- d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
- output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
- s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
- s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
- s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
- output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
- d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
- d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
- d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
- output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
- s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
- s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
- s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
- output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
- d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
- d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
- d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
- output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
- s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
- s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
- s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
- output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
- sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
- d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
- d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
- d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
- output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
-#endif // USE_DST2
}
static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1149,6 +875,67 @@
}
}
+#if CONFIG_EXT_TX
+void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ static const transform_2d IHT_32[] = {
+ { idct32_c, idct32_c }, // DCT_DCT = 0,
+ { ihalfright32_c, idct32_c }, // ADST_DCT = 1,
+ { idct32_c, ihalfright32_c }, // DCT_ADST = 2,
+ { ihalfright32_c, ihalfright32_c }, // ADST_ADST = 3,
+ { ihalfright32_c, idct32_c }, // FLIPADST_DCT = 4,
+ { idct32_c, ihalfright32_c }, // DCT_FLIPADST = 5,
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST = 6,
+ { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST = 7,
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST = 8,
+ { ihalfcenter32_c, idct32_c }, // DST_DCT = 9,
+ { idct32_c, ihalfcenter32_c }, // DCT_DST = 10,
+ { ihalfcenter32_c, ihalfright32_c }, // DST_ADST = 11,
+ { ihalfright32_c, ihalfcenter32_c }, // ADST_DST = 12,
+ { ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
+ { ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
+ { ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
+ };
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[32][32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 32;
+
+ // inverse transform row vectors
+ for (i = 0; i < 32; ++i) {
+ IHT_32[tx_type].rows(input, out[i]);
+ input += 32;
+ }
+
+ // transpose
+ for (i = 1 ; i < 32; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 32; ++i) {
+ IHT_32[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32);
+
+ // Sum with the destination
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
// idct
void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
@@ -1339,15 +1126,27 @@
vp10_idct32x32_add(input, dest, stride, eob);
break;
#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
+ break;
case IDTX:
inv_idtx_add_c(input, dest, stride, 32);
break;
#endif // CONFIG_EXT_TX
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- assert(0);
- break;
default:
assert(0);
break;
@@ -1553,6 +1352,70 @@
}
}
+#if CONFIG_EXT_TX
+void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_32[] = {
+ { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[32][32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 32;
+
+ // inverse transform row vectors
+ for (i = 0; i < 32; ++i) {
+ HIGH_IHT_32[tx_type].rows(input, out[i], bd);
+ input += 32;
+ }
+
+ // transpose
+ for (i = 1 ; i < 32; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 32; ++i) {
+ HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32);
+
+ // Sum with the destination
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
// idct
void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
@@ -1750,15 +1613,27 @@
vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
break;
#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
+ break;
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
break;
#endif // CONFIG_EXT_TX
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- assert(0);
- break;
default:
assert(0);
break;
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index efc1ee1..c1cb69d 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -654,8 +654,6 @@
uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * 8) >> pd->subsampling_x] :
&pd->dst.buf[(i * 8) >> pd->subsampling_x];
- int bmc_stride = pd->dst.stride;
- uint8_t *bmc = &pd->dst.buf[(i * 8) >> pd->subsampling_x];
int tmp_stride = tmp_stride1[plane];
uint8_t *tmp = &tmp_buf1[plane][(i * 8) >> pd->subsampling_x];
const uint8_t *mask[2];
@@ -665,27 +663,22 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- uint16_t *bmc16 = CONVERT_TO_SHORTPTR(bmc);
uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
for (row = 0; row < bh; ++row) {
- for (col = 0; col < bw; ++col) {
- dst16[col] = (mask[0][row] * bmc16[col] + mask[1][row] * tmp16[col]
+ for (col = 0; col < bw; ++col)
+ dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col]
+ 32) >> 6;
- }
dst16 += dst_stride;
- bmc16 += bmc_stride;
tmp16 += tmp_stride;
}
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
- for (col = 0; col < bw; ++col) {
- dst[col] = (mask[0][row] * bmc[col] + mask[1][row] * tmp[col] + 32)
+ for (col = 0; col < bw; ++col)
+ dst[col] = (mask[0][row] * dst[col] + mask[1][row] * tmp[col] + 32)
>> 6;
- }
dst += dst_stride;
- bmc += bmc_stride;
tmp += tmp_stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -727,8 +720,6 @@
uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * 8 * dst_stride) >> pd->subsampling_y] :
&pd->dst.buf[(i * 8 * dst_stride) >> pd->subsampling_y];
- int bmc_stride = pd->dst.stride;
- uint8_t *bmc = &pd->dst.buf[(i * 8 * bmc_stride) >> pd->subsampling_y];
int tmp_stride = tmp_stride2[plane];
uint8_t *tmp = &tmp_buf2[plane]
[(i * 8 * tmp_stride) >> pd->subsampling_y];
@@ -739,27 +730,22 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
- uint16_t *bmc16 = CONVERT_TO_SHORTPTR(bmc);
uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
for (row = 0; row < bh; ++row) {
- for (col = 0; col < bw; ++col) {
- dst16[col] = (mask[0][row] * bmc16[col] + mask[1][row] * tmp16[col]
+ for (col = 0; col < bw; ++col)
+ dst16[col] = (mask[0][row] * dst16[col] + mask[1][row] * tmp16[col]
+ 32) >> 6;
- }
dst16 += dst_stride;
- bmc16 += bmc_stride;
tmp16 += tmp_stride;
}
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
- for (col = 0; col < bw; ++col) {
- dst[col] = (mask[0][col] * bmc[col] + mask[1][col] * tmp[col] + 32)
+ for (col = 0; col < bw; ++col)
+ dst[col] = (mask[0][col] * dst[col] + mask[1][col] * tmp[col] + 32)
>> 6;
- }
dst += dst_stride;
- bmc += bmc_stride;
tmp += tmp_stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 9860bae..c9f0295 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -404,6 +404,9 @@
add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht16x16 sse2/;
+ add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x32/;
+
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
} else {
@@ -416,6 +419,9 @@
add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht16x16 sse2 msa/;
+ add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x32/;
+
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
}
@@ -642,6 +648,9 @@
add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_highbd_fht16x16/;
+ add_proto qw/void vp10_highbd_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht32x32/;
+
add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fwht4x4/;
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 5602753..333adbb 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -14,7 +14,6 @@
#include "./vp10_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
-
#include "vp10/common/blockd.h"
#include "vp10/common/idct.h"
#include "vpx_dsp/fwd_txfm.h"
@@ -39,7 +38,6 @@
#if CONFIG_EXT_TX
void fdst4(const tran_low_t *input, tran_low_t *output) {
-#if USE_DST2
tran_high_t step[4];
tran_high_t temp1, temp2;
@@ -56,29 +54,9 @@
temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
output[2] = fdct_round_shift(temp1);
output[0] = fdct_round_shift(temp2);
-#else
- // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
- static const int32_t sinvalue_lookup[] = {
- 141124871, 228344838,
- };
- int64_t sum;
- int64_t s03 = (input[0] + input[3]);
- int64_t d03 = (input[0] - input[3]);
- int64_t s12 = (input[1] + input[2]);
- int64_t d12 = (input[1] - input[2]);
- sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
- output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
- output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
- output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
- output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
-#endif // USE_DST2
}
void fdst8(const tran_low_t *input, tran_low_t *output) {
-#if USE_DST2
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
@@ -127,47 +105,9 @@
output[4] = fdct_round_shift(t2);
output[2] = fdct_round_shift(t1);
output[0] = fdct_round_shift(t3);
-#else
- // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
- static const int sinvalue_lookup[] = {
- 86559612, 162678858, 219176632, 249238470
- };
- int64_t sum;
- int64_t s07 = (input[0] + input[7]);
- int64_t d07 = (input[0] - input[7]);
- int64_t s16 = (input[1] + input[6]);
- int64_t d16 = (input[1] - input[6]);
- int64_t s25 = (input[2] + input[5]);
- int64_t d25 = (input[2] - input[5]);
- int64_t s34 = (input[3] + input[4]);
- int64_t d34 = (input[3] - input[4]);
- sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
- s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
- output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
- d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
- output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = (s07 + s16 - s34)* sinvalue_lookup[2];
- output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
- d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
- output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
- s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
- output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = (d07 - d16 + d34)* sinvalue_lookup[2];
- output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
- s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
- output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
- d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
- output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
-#endif // USE_DST2
}
void fdst16(const tran_low_t *input, tran_low_t *output) {
-#if USE_DST2
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
@@ -306,110 +246,6 @@
temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
output[8] = fdct_round_shift(temp1);
output[0] = fdct_round_shift(temp2);
-#else
- // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
- static const int sinvalue_lookup[] = {
- 47852167, 94074787, 137093803, 175444254,
- 207820161, 233119001, 250479254, 259309736
- };
- int64_t sum;
- int64_t s015 = (input[0] + input[15]);
- int64_t d015 = (input[0] - input[15]);
- int64_t s114 = (input[1] + input[14]);
- int64_t d114 = (input[1] - input[14]);
- int64_t s213 = (input[2] + input[13]);
- int64_t d213 = (input[2] - input[13]);
- int64_t s312 = (input[3] + input[12]);
- int64_t d312 = (input[3] - input[12]);
- int64_t s411 = (input[4] + input[11]);
- int64_t d411 = (input[4] - input[11]);
- int64_t s510 = (input[5] + input[10]);
- int64_t d510 = (input[5] - input[10]);
- int64_t s69 = (input[6] + input[9]);
- int64_t d69 = (input[6] - input[9]);
- int64_t s78 = (input[7] + input[8]);
- int64_t d78 = (input[7] - input[8]);
- sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
- s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
- s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
- s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
- output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
- d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
- d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
- d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
- output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
- s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
- s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
- s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
- output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
- d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
- d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
- d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
- output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
- s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
- s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
- s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
- output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
- d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
- d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
- d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
- output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
- s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
- s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
- s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
- output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
- d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
- d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
- d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
- output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
- s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
- s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
- s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
- output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
- d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
- d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
- d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
- output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
- s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
- s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
- s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
- output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
- d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
- d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
- d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
- output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
- s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
- s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
- s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
- output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
- d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
- d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
- d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
- output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
- s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
- s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
- s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
- output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
- d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
- d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
- d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
- output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
-#endif // USE_DST2
}
#endif // CONFIG_EXT_TX
@@ -701,7 +537,7 @@
range_check(output, 16, 16);
}
-/* TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
+#if CONFIG_EXT_TX
static void fdct32(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[32];
@@ -1099,7 +935,7 @@
range_check(output, 32, 18);
}
-*/
+#endif // CONFIG_EXT_TX
static void fadst4(const tran_low_t *input, tran_low_t *output) {
tran_high_t x0, x1, x2, x3;
@@ -1376,6 +1212,37 @@
}
#if CONFIG_EXT_TX
+// For use in lieu of DST
+static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 8; ++i) {
+ output[16 + i] = input[i] * 4;
+ output[24 + i] = input[24 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2);
+ }
+ fdct16(inputhalf, output);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+// For use in lieu of ADST
+static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[16 + i] = input[i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 16] * Sqrt2);
+ }
+ fdct16(inputhalf, output);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
static void copy_block(const int16_t *src, int src_stride, int l,
int16_t *dest, int dest_stride) {
int i;
@@ -1538,6 +1405,27 @@
#endif // CONFIG_EXT_TX
};
+#if CONFIG_EXT_TX
+static const transform_2d FHT_32[] = {
+ { fdct32, fdct32 }, // DCT_DCT = 0,
+ { fhalfright32, fdct32 }, // ADST_DCT = 1,
+ { fdct32, fhalfright32 }, // DCT_ADST = 2,
+ { fhalfright32, fhalfright32 }, // ADST_ADST = 3,
+ { fhalfright32, fdct32 }, // FLIPADST_DCT = 4,
+ { fdct32, fhalfright32 }, // DCT_FLIPADST = 5,
+ { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST = 6,
+ { fhalfright32, fhalfright32 }, // ADST_FLIPADST = 7,
+ { fhalfright32, fhalfright32 }, // FLIPADST_ADST = 8,
+ { fhalfcenter32, fdct32 }, // DST_DCT = 9,
+ { fdct32, fhalfcenter32 }, // DCT_DST = 10,
+ { fhalfcenter32, fhalfright32 }, // DST_ADST = 11,
+ { fhalfright32, fhalfcenter32 }, // ADST_DST = 12,
+ { fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
+ { fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
+ { fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
+};
+#endif // CONFIG_EXT_TX
+
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
if (tx_type == DCT_DCT) {
@@ -1834,3 +1722,46 @@
vp10_fht16x16_c(input, output, stride, tx_type);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_TX
+void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vpx_fdct32x32_c(input, output, stride);
+ } else {
+ tran_low_t out[1024];
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ const transform_2d ht = FHT_32[tx_type];
+
+ int16_t flipped_input[32 * 32];
+ maybe_flip_input(&input, &stride, 32, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = out[j + i * 32];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ output[j + i * 32] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht32x32_c(input, output, stride, tx_type);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 755c33b..2b96a86 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -2065,6 +2065,11 @@
if (!x->skip) {
// TODO(geza.lore): Investigate if this can be relaxed
x->skip_recode = 0;
+ memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
+
+ x->skip_optimize = 0;
+ x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
+
vp10_encode_sb_supertx(x, bsize);
vp10_tokenize_sb_supertx(cpi, td, tp, !output_enabled, bsize);
} else {
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 4c4261e..55ec9c1 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -50,7 +50,7 @@
#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
-#include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/psnr.h"
#if CONFIG_INTERNAL_STATS
#include "vpx_dsp/ssim.h"
#endif
@@ -2033,261 +2033,6 @@
#endif
}
-/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
- * and highbd_8_variance(). It should not.
- */
-static void encoder_variance(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int w, int h, unsigned int *sse, int *sum) {
- int i, j;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, uint64_t *sse,
- uint64_t *sum) {
- int i, j;
-
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
-}
-
-static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h,
- unsigned int *sse, int *sum) {
- uint64_t sse_long = 0;
- uint64_t sum_long = 0;
- encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
- &sse_long, &sum_long);
- *sse = (unsigned int)sse_long;
- *sum = (int)sum_long;
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-static int64_t get_sse(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int width, int height) {
- const int dw = width % 16;
- const int dh = height % 16;
- int64_t total_sse = 0;
- unsigned int sse = 0;
- int sum = 0;
- int x, y;
-
- if (dw > 0) {
- encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
- dw, height, &sse, &sum);
- total_sse += sse;
- }
-
- if (dh > 0) {
- encoder_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
- total_sse += sse;
- }
-
- for (y = 0; y < height / 16; ++y) {
- const uint8_t *pa = a;
- const uint8_t *pb = b;
- for (x = 0; x < width / 16; ++x) {
- vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);
- total_sse += sse;
-
- pa += 16;
- pb += 16;
- }
-
- a += 16 * a_stride;
- b += 16 * b_stride;
- }
-
- return total_sse;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int width, int height,
- unsigned int input_shift) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t total_sse = 0;
- int x, y;
- for (y = 0; y < height; ++y) {
- for (x = 0; x < width; ++x) {
- int64_t diff;
- diff = (a[x] >> input_shift) - (b[x] >> input_shift);
- total_sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
- return total_sse;
-}
-
-static int64_t highbd_get_sse(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int width, int height) {
- int64_t total_sse = 0;
- int x, y;
- const int dw = width % 16;
- const int dh = height % 16;
- unsigned int sse = 0;
- int sum = 0;
- if (dw > 0) {
- encoder_highbd_8_variance(&a[width - dw], a_stride,
- &b[width - dw], b_stride,
- dw, height, &sse, &sum);
- total_sse += sse;
- }
- if (dh > 0) {
- encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
- total_sse += sse;
- }
- for (y = 0; y < height / 16; ++y) {
- const uint8_t *pa = a;
- const uint8_t *pb = b;
- for (x = 0; x < width / 16; ++x) {
- vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
- total_sse += sse;
- pa += 16;
- pb += 16;
- }
- a += 16 * a_stride;
- b += 16 * b_stride;
- }
- return total_sse;
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-typedef struct {
- double psnr[4]; // total/y/u/v
- uint64_t sse[4]; // total/y/u/v
- uint32_t samples[4]; // total/y/u/v
-} PSNR_STATS;
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr,
- unsigned int bit_depth,
- unsigned int in_bit_depth) {
- const int widths[3] =
- {a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
- const int heights[3] =
- {a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
- const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer };
- const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
- const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer };
- const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
- const double peak = (double)((1 << in_bit_depth) - 1);
- const unsigned int input_shift = bit_depth - in_bit_depth;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- uint64_t sse;
- if (a->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (input_shift) {
- sse = highbd_get_sse_shift(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i], w, h,
- input_shift);
- } else {
- sse = highbd_get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i], w, h);
- }
- } else {
- sse = get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i],
- w, h);
- }
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
- (double)total_sse);
-}
-
-#else // !CONFIG_VP9_HIGHBITDEPTH
-
-static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr) {
- static const double peak = 255.0;
- const int widths[3] = {
- a->y_crop_width, a->uv_crop_width, a->uv_crop_width};
- const int heights[3] = {
- a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
- const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};
- const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
- const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};
- const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- const uint64_t sse = get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i],
- w, h);
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
- (double)total_sse);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
static void generate_psnr_packet(VP10_COMP *cpi) {
struct vpx_codec_cx_pkt pkt;
@@ -2955,7 +2700,7 @@
vpx_clear_system_state();
- recon_err = vp10_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
@@ -3380,12 +3125,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- kf_err = vp10_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
} else {
- kf_err = vp10_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
}
#else
- kf_err = vp10_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#endif // CONFIG_VP9_HIGHBITDEPTH
// Prevent possible divide by zero error below for perfect KF
@@ -3804,13 +3549,13 @@
if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- cpi->ambient_err = vp10_highbd_get_y_sse(cpi->Source,
+ cpi->ambient_err = vpx_highbd_get_y_sse(cpi->Source,
get_frame_new_buffer(cm));
} else {
- cpi->ambient_err = vp10_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
}
#else
- cpi->ambient_err = vp10_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
@@ -3993,13 +3738,22 @@
int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
- VP10_COMMON *cm = &cpi->common;
+ VP10_COMMON *volatile const cm = &cpi->common;
struct vpx_usec_timer timer;
- int res = 0;
+ volatile int res = 0;
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
+#endif
+
+ if (setjmp(cm->error.jmp)) {
+ cm->error.setjmp = 0;
+ return -1;
+ }
+ cm->error.setjmp = 1;
+
+#if CONFIG_VP9_HIGHBITDEPTH
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
check_initial_width(cpi, subsampling_x, subsampling_y);
@@ -4032,6 +3786,7 @@
res = -1;
}
+ cm->error.setjmp = 0;
return res;
}
@@ -4547,28 +4302,6 @@
return 0;
}
-int64_t vp10_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->y_crop_width == b->y_crop_width);
- assert(a->y_crop_height == b->y_crop_height);
-
- return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
- a->y_crop_width, a->y_crop_height);
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-int64_t vp10_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->y_crop_width == b->y_crop_width);
- assert(a->y_crop_height == b->y_crop_height);
- assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
- assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
-
- return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
- a->y_crop_width, a->y_crop_height);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
int vp10_get_quantizer(VP10_COMP *cpi) {
return cpi->common.base_qindex;
}
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index cc20765..59c7682 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -633,12 +633,6 @@
return get_token_alloc(tile_mb_rows, tile_mb_cols);
}
-int64_t vp10_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
-#if CONFIG_VP9_HIGHBITDEPTH
-int64_t vp10_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
void vp10_alloc_compressor_data(VP10_COMP *cpi);
void vp10_scale_references(VP10_COMP *cpi);
diff --git a/vp10/encoder/picklpf.c b/vp10/encoder/picklpf.c
index f116c00..cb2c1c7 100644
--- a/vp10/encoder/picklpf.c
+++ b/vp10/encoder/picklpf.c
@@ -13,6 +13,7 @@
#include "./vpx_scale_rtcd.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -56,12 +57,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- filt_err = vp10_highbd_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);
} else {
- filt_err = vp10_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
}
#else
- filt_err = vp10_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Re-instate the unfiltered frame
diff --git a/vp10/encoder/pickrst.c b/vp10/encoder/pickrst.c
index 79cda43..9982836 100644
--- a/vp10/encoder/pickrst.c
+++ b/vp10/encoder/pickrst.c
@@ -14,6 +14,7 @@
#include "./vpx_scale_rtcd.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -36,12 +37,12 @@
rsi, 1, partial_frame);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- filt_err = vp10_highbd_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);
} else {
- filt_err = vp10_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
}
#else
- filt_err = vp10_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Re-instate the unfiltered frame
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 30b8406..d9be29d 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -539,47 +539,70 @@
cost = token_costs[0][0][pt][EOB_TOKEN];
c = 0;
} else {
- int band_left = *band_count++;
+ if (use_fast_coef_costing) {
+ int band_left = *band_count++;
- // dc token
- int v = qcoeff[0];
- int16_t prev_t;
- EXTRABIT e;
- vp10_get_token_extra(v, &prev_t, &e);
- cost = (*token_costs)[0][pt][prev_t] +
- vp10_get_cost(prev_t, e, cat6_high_cost);
+ // dc token
+ int v = qcoeff[0];
+ int16_t prev_t;
+ cost = vp10_get_token_cost(v, &prev_t, cat6_high_cost);
+ cost += (*token_costs)[0][pt][prev_t];
- token_cache[0] = vp10_pt_energy_class[prev_t];
- ++token_costs;
+ token_cache[0] = vp10_pt_energy_class[prev_t];
+ ++token_costs;
- // ac tokens
- for (c = 1; c < eob; c++) {
- const int rc = scan[c];
- int16_t t;
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+ int16_t t;
- v = qcoeff[rc];
- vp10_get_token_extra(v, &t, &e);
- if (use_fast_coef_costing) {
- cost += (*token_costs)[!prev_t][!prev_t][t] +
- vp10_get_cost(t, e, cat6_high_cost);
- } else {
- pt = get_coef_context(nb, token_cache, c);
- cost += (*token_costs)[!prev_t][pt][t] +
- vp10_get_cost(t, e, cat6_high_cost);
- token_cache[rc] = vp10_pt_energy_class[t];
+ v = qcoeff[rc];
+ cost += vp10_get_token_cost(v, &t, cat6_high_cost);
+ cost += (*token_costs)[!prev_t][!prev_t][t];
+ prev_t = t;
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
}
- prev_t = t;
- if (!--band_left) {
- band_left = *band_count++;
- ++token_costs;
- }
- }
- // eob token
- if (band_left) {
- if (use_fast_coef_costing) {
+ // eob token
+ if (band_left)
cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
- } else {
+
+ } else { // !use_fast_coef_costing
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t tok;
+ unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
+ cost = vp10_get_token_cost(v, &tok, cat6_high_cost);
+ cost += (*token_costs)[0][pt][tok];
+
+ token_cache[0] = vp10_pt_energy_class[tok];
+ ++token_costs;
+
+ tok_cost_ptr = &((*token_costs)[!tok]);
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+
+ v = qcoeff[rc];
+ cost += vp10_get_token_cost(v, &tok, cat6_high_cost);
+ pt = get_coef_context(nb, token_cache, c);
+ cost += (*tok_cost_ptr)[pt][tok];
+ token_cache[rc] = vp10_pt_energy_class[tok];
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
+ tok_cost_ptr = &((*token_costs)[!tok]);
+ }
+
+ // eob token
+ if (band_left) {
pt = get_coef_context(nb, token_cache, c);
cost += (*token_costs)[0][pt][EOB_TOKEN];
}
@@ -5089,6 +5112,8 @@
DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_OBMC
+ int allow_obmc = is_obmc_allowed(mbmi);
+ int best_obmc_flag = 0;
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * 64 * 64]);
uint8_t *tmp_buf1;
@@ -5098,13 +5123,11 @@
uint8_t *obmc_tmp_buf[3] = {tmp_buf1, tmp_buf1 + 4096, tmp_buf1 + 8192};
#endif // CONFIG_VP9_HIGHBITDEPTH
int obmc_tmp_stride[3] = {64, 64, 64};
- int best_obmc_flag = 0;
uint8_t tmp_skip_txfm[MAX_MB_PLANE << 2] = {0};
int64_t tmp_bsse[MAX_MB_PLANE << 2] = {0};
int64_t rdobmc;
int skip_txfm_sb_obmc = 0;
int64_t skip_sse_sb_obmc = INT64_MAX;
- int allow_obmc = is_obmc_allowed(mbmi);
#endif // CONFIG_OBMC
int pred_exists = 0;
int intpel_mv;
@@ -5334,8 +5357,9 @@
if (this_mode == NEARMV && is_comp_pred) {
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
- cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv;
- cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv;
+ int ref_mv_idx = mbmi->ref_mv_idx + 1;
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
for (i = 0; i < 2; ++i) {
lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
@@ -5587,6 +5611,7 @@
#if CONFIG_OBMC
int tmp_rate_obmc;
int64_t tmp_dist_obmc;
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
#endif // CONFIG_OBMC
// Handles the special case when a filter that is not in the
// switchable list (ex. bilinear) is indicated at the frame level, or
@@ -5594,19 +5619,14 @@
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#if CONFIG_OBMC
if (mbmi->obmc) {
- vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
- obmc_tmp_buf, obmc_tmp_stride,
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
+ NULL, NULL,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- xd->plane[i].dst.buf = obmc_tmp_buf[i];
- xd->plane[i].dst.stride = obmc_tmp_stride[i];
- }
model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
&skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv,
- rs + tmp_rate + cpi->obmc_cost[bsize][1],
- tmp_dist);
+ rs + tmp_rate + cpi->obmc_cost[bsize][1], tmp_dist);
} else {
#endif // CONFIG_OBMC
model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index 0aaeb2a..5cae8e3 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -50,6 +50,35 @@
const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
/ 2;
+// The corresponding costs of the extrabits for the tokens in the above table
+// are stored in the table below. The values are obtained from looking up the
+// entry for the specified extrabits in the table corresponding to the token
+// (as defined in cost element vp10_extra_bits)
+// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
+static const int dct_cat_lt_10_value_cost[] = {
+ 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531,
+ 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190,
+ 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894,
+ 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553,
+ 3197, 3116, 3058, 2977, 2881, 2800,
+ 2742, 2661, 2615, 2534, 2476, 2395,
+ 2299, 2218, 2160, 2079,
+ 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652,
+ 1893, 1696, 1453, 1256, 1229, 864,
+ 512, 512, 512, 512, 0,
+ 512, 512, 512, 512,
+ 864, 1229, 1256, 1453, 1696, 1893,
+ 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566,
+ 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615,
+ 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197,
+ 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795,
+ 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
+ 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432,
+ 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773,
+};
+const int *vp10_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost +
+ (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost))
+ / 2;
// Array indices are identical to previously-existing CONTEXT_NODE indices
const vpx_tree_index vp10_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h
index 12f5f1f..46b7f3f 100644
--- a/vp10/encoder/tokenize.h
+++ b/vp10/encoder/tokenize.h
@@ -76,6 +76,7 @@
*/
extern const TOKENVALUE *vp10_dct_value_tokens_ptr;
extern const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens;
+extern const int *vp10_dct_cat_lt_10_value_cost;
extern const int16_t vp10_cat6_low_cost[256];
extern const int vp10_cat6_high_cost[64];
extern const int vp10_cat6_high10_high_cost[256];
@@ -119,6 +120,18 @@
return vp10_dct_cat_lt_10_value_tokens[v].token;
}
+static INLINE int vp10_get_token_cost(int v, int16_t *token,
+ const int *cat6_high_table) {
+ if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
+ EXTRABIT extrabits;
+ *token = CATEGORY6_TOKEN;
+ extrabits = abs(v) - CAT6_MIN_VAL;
+ return vp10_cat6_low_cost[extrabits & 0xff]
+ + cat6_high_table[extrabits >> 8];
+ }
+ *token = vp10_dct_cat_lt_10_value_tokens[v].token;
+ return vp10_dct_cat_lt_10_value_cost[v];
+}
#ifdef __cplusplus
} // extern "C"
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index b1c2e11..edfd60c 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -21,7 +21,7 @@
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
-#include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 24c6c54..7dd1005 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -119,6 +119,20 @@
cm->lf.lfm = NULL;
}
+
+int vp9_alloc_loop_filter(VP9_COMMON *cm) {
+ vpx_free(cm->lf.lfm);
+ // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
+ // stride and rows are rounded up / truncated to a multiple of 8.
+ cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
+ cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
+ ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
+ sizeof(*cm->lf.lfm));
+ if (!cm->lf.lfm)
+ return 1;
+ return 0;
+}
+
int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
int new_mi_size;
@@ -151,15 +165,8 @@
cm->above_context_alloc_cols = cm->mi_cols;
}
- vpx_free(cm->lf.lfm);
-
- // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
- // stride and rows are rounded up / truncated to a multiple of 8.
- cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
- cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
- ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
- sizeof(*cm->lf.lfm));
- if (!cm->lf.lfm) goto fail;
+ if (vp9_alloc_loop_filter(cm))
+ goto fail;
return 0;
diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h
index c0e51a6..e53955b 100644
--- a/vp9/common/vp9_alloccommon.h
+++ b/vp9/common/vp9_alloccommon.h
@@ -23,6 +23,7 @@
void vp9_remove_common(struct VP9Common *cm);
+int vp9_alloc_loop_filter(struct VP9Common *cm);
int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
void vp9_init_context_buffers(struct VP9Common *cm);
void vp9_free_context_buffers(struct VP9Common *cm);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a2445b0..cf1fe81 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3031,10 +3031,24 @@
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi;
+ ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
+ BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
+ int plane;
+
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mi = xd->mi[0];
mi->sb_type = bsize;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
+ (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
+ memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
+ (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
+ }
+
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
if (cyclic_refresh_segment_id_boosted(mi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
@@ -3052,6 +3066,14 @@
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
+ (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
+ memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
+ (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
+ }
+
if (rd_cost->rate == INT_MAX)
vp9_rd_cost_reset(rd_cost);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index f3147e9..713b5f7 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -16,7 +16,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
-#include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/psnr.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/vpx_filter.h"
#if CONFIG_INTERNAL_STATS
@@ -1538,8 +1538,12 @@
realloc_segmentation_maps(cpi);
cpi->initial_width = cpi->initial_height = 0;
cpi->external_resize = 0;
+ } else if (cm->mi_alloc_size == new_mi_size &&
+ (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) {
+ vp9_alloc_loop_filter(cm);
}
}
+
update_frame_size(cpi);
if ((last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) &&
@@ -2136,262 +2140,6 @@
#endif
}
-/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
- * and highbd_8_variance(). It should not.
- */
-static void encoder_variance(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int w, int h, unsigned int *sse, int *sum) {
- int i, j;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, uint64_t *sse,
- uint64_t *sum) {
- int i, j;
-
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
-}
-
-static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h,
- unsigned int *sse, int *sum) {
- uint64_t sse_long = 0;
- uint64_t sum_long = 0;
- encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
- &sse_long, &sum_long);
- *sse = (unsigned int)sse_long;
- *sum = (int)sum_long;
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-static int64_t get_sse(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int width, int height) {
- const int dw = width % 16;
- const int dh = height % 16;
- int64_t total_sse = 0;
- unsigned int sse = 0;
- int sum = 0;
- int x, y;
-
- if (dw > 0) {
- encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
- dw, height, &sse, &sum);
- total_sse += sse;
- }
-
- if (dh > 0) {
- encoder_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
- total_sse += sse;
- }
-
- for (y = 0; y < height / 16; ++y) {
- const uint8_t *pa = a;
- const uint8_t *pb = b;
- for (x = 0; x < width / 16; ++x) {
- vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);
- total_sse += sse;
-
- pa += 16;
- pb += 16;
- }
-
- a += 16 * a_stride;
- b += 16 * b_stride;
- }
-
- return total_sse;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int width, int height,
- unsigned int input_shift) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- int64_t total_sse = 0;
- int x, y;
- for (y = 0; y < height; ++y) {
- for (x = 0; x < width; ++x) {
- int64_t diff;
- diff = (a[x] >> input_shift) - (b[x] >> input_shift);
- total_sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
- return total_sse;
-}
-
-static int64_t highbd_get_sse(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int width, int height) {
- int64_t total_sse = 0;
- int x, y;
- const int dw = width % 16;
- const int dh = height % 16;
- unsigned int sse = 0;
- int sum = 0;
- if (dw > 0) {
- encoder_highbd_8_variance(&a[width - dw], a_stride,
- &b[width - dw], b_stride,
- dw, height, &sse, &sum);
- total_sse += sse;
- }
- if (dh > 0) {
- encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
- total_sse += sse;
- }
- for (y = 0; y < height / 16; ++y) {
- const uint8_t *pa = a;
- const uint8_t *pb = b;
- for (x = 0; x < width / 16; ++x) {
- vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
- total_sse += sse;
- pa += 16;
- pb += 16;
- }
- a += 16 * a_stride;
- b += 16 * b_stride;
- }
- return total_sse;
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-typedef struct {
- double psnr[4]; // total/y/u/v
- uint64_t sse[4]; // total/y/u/v
- uint32_t samples[4]; // total/y/u/v
-} PSNR_STATS;
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr,
- unsigned int bit_depth,
- unsigned int in_bit_depth) {
- const int widths[3] =
- {a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
- const int heights[3] =
- {a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
- const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer };
- const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
- const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer };
- const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
- const double peak = (double)((1 << in_bit_depth) - 1);
- const unsigned int input_shift = bit_depth - in_bit_depth;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- uint64_t sse;
- if (a->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (input_shift) {
- sse = highbd_get_sse_shift(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i], w, h,
- input_shift);
- } else {
- sse = highbd_get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i], w, h);
- }
- } else {
- sse = get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i],
- w, h);
- }
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
- (double)total_sse);
-}
-
-#else // !CONFIG_VP9_HIGHBITDEPTH
-
-static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr) {
- static const double peak = 255.0;
- const int widths[3] = {
- a->y_crop_width, a->uv_crop_width, a->uv_crop_width};
- const int heights[3] = {
- a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
- const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};
- const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
- const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};
- const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- const uint64_t sse = get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i],
- w, h);
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
- (double)total_sse);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
static void generate_psnr_packet(VP9_COMP *cpi) {
struct vpx_codec_cx_pkt pkt;
int i;
@@ -3057,7 +2805,7 @@
vpx_clear_system_state();
- recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
@@ -3567,12 +3315,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
} else {
- kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
}
#else
- kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#endif // CONFIG_VP9_HIGHBITDEPTH
// Prevent possible divide by zero error below for perfect KF
@@ -3963,13 +3711,13 @@
if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- cpi->ambient_err = vp9_highbd_get_y_sse(cpi->Source,
+ cpi->ambient_err = vpx_highbd_get_y_sse(cpi->Source,
get_frame_new_buffer(cm));
} else {
- cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
}
#else
- cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
@@ -4141,13 +3889,22 @@
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
- VP9_COMMON *cm = &cpi->common;
+ VP9_COMMON *volatile const cm = &cpi->common;
struct vpx_usec_timer timer;
- int res = 0;
+ volatile int res = 0;
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
#if CONFIG_VP9_HIGHBITDEPTH
const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
+#endif
+
+ if (setjmp(cm->error.jmp)) {
+ cm->error.setjmp = 0;
+ return -1;
+ }
+ cm->error.setjmp = 1;
+
+#if CONFIG_VP9_HIGHBITDEPTH
check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
#else
check_initial_width(cpi, subsampling_x, subsampling_y);
@@ -4180,6 +3937,7 @@
res = -1;
}
+ cm->error.setjmp = 0;
return res;
}
@@ -4830,28 +4588,6 @@
return;
}
-int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->y_crop_width == b->y_crop_width);
- assert(a->y_crop_height == b->y_crop_height);
-
- return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
- a->y_crop_width, a->y_crop_height);
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b) {
- assert(a->y_crop_width == b->y_crop_width);
- assert(a->y_crop_height == b->y_crop_height);
- assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
- assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
-
- return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
- a->y_crop_width, a->y_crop_height);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
int vp9_get_quantizer(VP9_COMP *cpi) {
return cpi->common.base_qindex;
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 8759cbe..017fa61 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -608,12 +608,6 @@
return get_token_alloc(tile_mb_rows, tile_mb_cols);
}
-int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
-#if CONFIG_VP9_HIGHBITDEPTH
-int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
- const YV12_BUFFER_CONFIG *b);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
void vp9_scale_references(VP9_COMP *cpi);
void vp9_update_reference_frames(VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index f6b1dfc..80ab238 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -12,7 +12,7 @@
#include <limits.h>
#include "./vpx_scale_rtcd.h"
-
+#include "vpx_dsp/psnr.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -52,12 +52,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);
} else {
- filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
}
#else
- filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Re-instate the unfiltered frame
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1480ea4..193c9d3 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -787,9 +787,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t best_dst16[8 * 8];
#endif
+ memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
+ memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
- memcpy(ta, a, sizeof(ta));
- memcpy(tl, l, sizeof(tl));
xd->mi[0]->tx_size = TX_4X4;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -810,8 +810,8 @@
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -874,8 +874,8 @@
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
@@ -914,8 +914,8 @@
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -976,8 +976,8 @@
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
@@ -1013,12 +1013,8 @@
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[4], t_left[4];
const int *bmode_costs = cpi->mbmode_cost;
- memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
- memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
-
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1034,8 +1030,11 @@
}
this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
- bmode_costs, t_above + idx, t_left + idy,
+ bmode_costs,
+ xd->plane[0].above_context + idx,
+ xd->plane[0].left_context + idy,
&r, &ry, &d, bsize, best_rd - total_rd);
+
if (this_rd >= best_rd - total_rd)
return INT64_MAX;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 8a34fd9..f684507 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -303,14 +303,26 @@
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
- // Disable reference masking if using spatial scaling or for dynamic
- // resizing (internal or external) since pred_mv_sad will not be set
- // (since vp9_mv_pred will not be called).
- // TODO(marpan): Fix this condition to allow reference masking for when
- // all references have same resolution as source frame.
- sf->reference_masking = (cpi->external_resize == 0 &&
- cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
- cpi->svc.number_spatial_layers == 1) ? 1 : 0;
+ // Reference masking only enabled for 1 spatial layer, and if none of the
+ // references have been scaled. The latter condition needs to be checked
+ // for external or internal dynamic resize.
+ sf->reference_masking = (cpi->svc.number_spatial_layers == 1);
+ if (sf->reference_masking == 1 &&
+ (cpi->external_resize == 1 ||
+ cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) {
+ MV_REFERENCE_FRAME ref_frame;
+ static const int flag_list[4] =
+ {0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
+ const struct scale_factors *const scale_fac =
+ &cm->frame_refs[ref_frame - 1].sf;
+ if (vp9_is_scaled(scale_fac))
+ sf->reference_masking = 0;
+ }
+ }
+ }
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
diff --git a/vpx/internal/vpx_psnr.h b/vpx/internal/vpx_psnr.h
deleted file mode 100644
index 0e90085..0000000
--- a/vpx/internal/vpx_psnr.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_INTERNAL_VPX_PSNR_H_
-#define VPX_INTERNAL_VPX_PSNR_H_
-
-#define MAX_PSNR 100.0
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t
-
-/*!\brief Converts SSE to PSNR
- *
- * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).
- *
- * \param[in] samples Number of samples
- * \param[in] peak Max sample value
- * \param[in] sse Sum of squared errors
- */
-double vpx_sse_to_psnr(double samples, double peak, double sse);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VPX_INTERNAL_VPX_PSNR_H_
diff --git a/vpx/src/vpx_psnr.c b/vpx/src/vpx_psnr.c
deleted file mode 100644
index 27a6180..0000000
--- a/vpx/src/vpx_psnr.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-
-#include "vpx/internal/vpx_psnr.h"
-
-
-double vpx_sse_to_psnr(double samples, double peak, double sse) {
- if (sse > 0.0) {
- const double psnr = 10.0 * log10(samples * peak * peak / sse);
- return psnr > MAX_PSNR ? MAX_PSNR : psnr;
- } else {
- return MAX_PSNR;
- }
-}
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index ccdef04..b77f458 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -36,10 +36,8 @@
API_SRCS-yes += src/vpx_encoder.c
API_SRCS-yes += vpx_encoder.h
API_SRCS-yes += internal/vpx_codec_internal.h
-API_SRCS-yes += internal/vpx_psnr.h
API_SRCS-yes += src/vpx_codec.c
API_SRCS-yes += src/vpx_image.c
-API_SRCS-yes += src/vpx_psnr.c
API_SRCS-yes += vpx_codec.h
API_SRCS-yes += vpx_codec.mk
API_SRCS-yes += vpx_frame_buffer.h
diff --git a/vpx_dsp/intrapred.c b/vpx_dsp/intrapred.c
index dcc9b30..b1076f8 100644
--- a/vpx_dsp/intrapred.c
+++ b/vpx_dsp/intrapred.c
@@ -320,6 +320,7 @@
const int K = above[2];
const int L = above[3];
const int M = above[4];
+ (void)left;
dst[0] = AVG3(H, I, J);
dst[1] = AVG3(I, J, K);
diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c
index a0f59bf..402fd9a 100644
--- a/vpx_dsp/inv_txfm.c
+++ b/vpx_dsp/inv_txfm.c
@@ -2057,8 +2057,8 @@
}
}
-static void highbd_idct32_c(const tran_low_t *input,
- tran_low_t *output, int bd) {
+void vpx_highbd_idct32_c(const tran_low_t *input,
+ tran_low_t *output, int bd) {
tran_low_t step1[32], step2[32];
tran_high_t temp1, temp2;
(void) bd;
@@ -2447,7 +2447,7 @@
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
if (zero_coeff[0] | zero_coeff[1])
- highbd_idct32_c(input, outptr, bd);
+ vpx_highbd_idct32_c(input, outptr, bd);
else
memset(outptr, 0, sizeof(tran_low_t) * 32);
input += 32;
@@ -2458,7 +2458,7 @@
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- highbd_idct32_c(temp_in, temp_out, bd);
+ vpx_highbd_idct32_c(temp_in, temp_out, bd);
for (j = 0; j < 32; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
@@ -2477,7 +2477,7 @@
// Rows
// Only upper-left 8x8 has non-zero coeff.
for (i = 0; i < 8; ++i) {
- highbd_idct32_c(input, outptr, bd);
+ vpx_highbd_idct32_c(input, outptr, bd);
input += 32;
outptr += 32;
}
@@ -2485,7 +2485,7 @@
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- highbd_idct32_c(temp_in, temp_out, bd);
+ vpx_highbd_idct32_c(temp_in, temp_out, bd);
for (j = 0; j < 32; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
diff --git a/vpx_dsp/inv_txfm.h b/vpx_dsp/inv_txfm.h
index 2358813..adbb838 100644
--- a/vpx_dsp/inv_txfm.h
+++ b/vpx_dsp/inv_txfm.h
@@ -100,6 +100,7 @@
void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
diff --git a/vpx_dsp/psnr.c b/vpx_dsp/psnr.c
new file mode 100644
index 0000000..1b92e2a
--- /dev/null
+++ b/vpx_dsp/psnr.c
@@ -0,0 +1,297 @@
+/*
+* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+*
+* Use of this source code is governed by a BSD-style license
+* that can be found in the LICENSE file in the root of the source
+* tree. An additional intellectual property rights grant can be found
+* in the file PATENTS. All contributing project authors may
+* be found in the AUTHORS file in the root of the source tree.
+*/
+
+#include <math.h>
+#include <assert.h>
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/psnr.h"
+#include "vpx_scale/yv12config.h"
+
+
+double vpx_sse_to_psnr(double samples, double peak, double sse) {
+ if (sse > 0.0) {
+ const double psnr = 10.0 * log10(samples * peak * peak / sse);
+ return psnr > MAX_PSNR ? MAX_PSNR : psnr;
+ } else {
+ return MAX_PSNR;
+ }
+}
+
+/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
+* and highbd_8_variance(). It should not.
+*/
+static void encoder_variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, uint64_t *sse,
+ uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h,
+ unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
+ &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static int64_t get_sse(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int width, int height) {
+ const int dw = width % 16;
+ const int dh = height % 16;
+ int64_t total_sse = 0;
+ unsigned int sse = 0;
+ int sum = 0;
+ int x, y;
+
+ if (dw > 0) {
+ encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
+ dw, height, &sse, &sum);
+ total_sse += sse;
+ }
+
+ if (dh > 0) {
+ encoder_variance(&a[(height - dh) * a_stride], a_stride,
+ &b[(height - dh) * b_stride], b_stride,
+ width - dw, dh, &sse, &sum);
+ total_sse += sse;
+ }
+
+ for (y = 0; y < height / 16; ++y) {
+ const uint8_t *pa = a;
+ const uint8_t *pb = b;
+ for (x = 0; x < width / 16; ++x) {
+ vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);
+ total_sse += sse;
+
+ pa += 16;
+ pb += 16;
+ }
+
+ a += 16 * a_stride;
+ b += 16 * b_stride;
+ }
+
+ return total_sse;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int width, int height,
+ unsigned int input_shift) {
+ const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ int64_t total_sse = 0;
+ int x, y;
+ for (y = 0; y < height; ++y) {
+ for (x = 0; x < width; ++x) {
+ int64_t diff;
+ diff = (a[x] >> input_shift) - (b[x] >> input_shift);
+ total_sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+ return total_sse;
+}
+
+int64_t highbd_get_sse(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int width, int height) {
+ int64_t total_sse = 0;
+ int x, y;
+ const int dw = width % 16;
+ const int dh = height % 16;
+ unsigned int sse = 0;
+ int sum = 0;
+ if (dw > 0) {
+ encoder_highbd_8_variance(&a[width - dw], a_stride,
+ &b[width - dw], b_stride,
+ dw, height, &sse, &sum);
+ total_sse += sse;
+ }
+ if (dh > 0) {
+ encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
+ &b[(height - dh) * b_stride], b_stride,
+ width - dw, dh, &sse, &sum);
+ total_sse += sse;
+ }
+ for (y = 0; y < height / 16; ++y) {
+ const uint8_t *pa = a;
+ const uint8_t *pb = b;
+ for (x = 0; x < width / 16; ++x) {
+ vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
+ total_sse += sse;
+ pa += 16;
+ pb += 16;
+ }
+ a += 16 * a_stride;
+ b += 16 * b_stride;
+ }
+ return total_sse;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+
+int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b) {
+ assert(a->y_crop_width == b->y_crop_width);
+ assert(a->y_crop_height == b->y_crop_height);
+
+ return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
+ a->y_crop_width, a->y_crop_height);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b) {
+ assert(a->y_crop_width == b->y_crop_width);
+ assert(a->y_crop_height == b->y_crop_height);
+ assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
+ assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
+
+ return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
+ a->y_crop_width, a->y_crop_height);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b,
+ PSNR_STATS *psnr,
+ unsigned int bit_depth,
+ unsigned int in_bit_depth) {
+ const int widths[3] =
+ { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
+ const int heights[3] =
+ { a->y_crop_height, a->uv_crop_height, a->uv_crop_height };
+ const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };
+ const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
+ const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };
+ const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
+ int i;
+ uint64_t total_sse = 0;
+ uint32_t total_samples = 0;
+ const double peak = (double)((1 << in_bit_depth) - 1);
+ const unsigned int input_shift = bit_depth - in_bit_depth;
+
+ for (i = 0; i < 3; ++i) {
+ const int w = widths[i];
+ const int h = heights[i];
+ const uint32_t samples = w * h;
+ uint64_t sse;
+ if (a->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (input_shift) {
+ sse = highbd_get_sse_shift(a_planes[i], a_strides[i],
+ b_planes[i], b_strides[i], w, h,
+ input_shift);
+ } else {
+ sse = highbd_get_sse(a_planes[i], a_strides[i],
+ b_planes[i], b_strides[i], w, h);
+ }
+ } else {
+ sse = get_sse(a_planes[i], a_strides[i],
+ b_planes[i], b_strides[i],
+ w, h);
+ }
+ psnr->sse[1 + i] = sse;
+ psnr->samples[1 + i] = samples;
+ psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
+
+ total_sse += sse;
+ total_samples += samples;
+ }
+
+ psnr->sse[0] = total_sse;
+ psnr->samples[0] = total_samples;
+ psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
+ (double)total_sse);
+}
+
+#endif // !CONFIG_VP9_HIGHBITDEPTH
+
+void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
+ PSNR_STATS *psnr) {
+ static const double peak = 255.0;
+ const int widths[3] = {
+ a->y_crop_width, a->uv_crop_width, a->uv_crop_width };
+ const int heights[3] = {
+ a->y_crop_height, a->uv_crop_height, a->uv_crop_height };
+ const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };
+ const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };
+ const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };
+ const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };
+ int i;
+ uint64_t total_sse = 0;
+ uint32_t total_samples = 0;
+
+ for (i = 0; i < 3; ++i) {
+ const int w = widths[i];
+ const int h = heights[i];
+ const uint32_t samples = w * h;
+ const uint64_t sse = get_sse(a_planes[i], a_strides[i],
+ b_planes[i], b_strides[i],
+ w, h);
+ psnr->sse[1 + i] = sse;
+ psnr->samples[1 + i] = samples;
+ psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
+
+ total_sse += sse;
+ total_samples += samples;
+ }
+
+ psnr->sse[0] = total_sse;
+ psnr->samples[0] = total_samples;
+ psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
+ (double)total_sse);
+}
diff --git a/vpx_dsp/psnr.h b/vpx_dsp/psnr.h
new file mode 100644
index 0000000..c8da94f
--- /dev/null
+++ b/vpx_dsp/psnr.h
@@ -0,0 +1,65 @@
+/*
+* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+*
+* Use of this source code is governed by a BSD-style license
+* that can be found in the LICENSE file in the root of the source
+* tree. An additional intellectual property rights grant can be found
+* in the file PATENTS. All contributing project authors may
+* be found in the AUTHORS file in the root of the source tree.
+*/
+
+#ifndef VPX_DSP_PSNR_H_
+#define VPX_DSP_PSNR_H_
+
+
+#include "vpx_scale/yv12config.h"
+
+#define MAX_PSNR 100.0
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ double psnr[4]; // total/y/u/v
+ uint64_t sse[4]; // total/y/u/v
+ uint32_t samples[4]; // total/y/u/v
+} PSNR_STATS;
+
+// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t
+
+/*!\brief Converts SSE to PSNR
+*
+* Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).
+*
+* \param[in] samples Number of samples
+* \param[in] peak Max sample value
+* \param[in] sse Sum of squared errors
+*/
+double vpx_sse_to_psnr(double samples, double peak, double sse);
+int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b);
+#if CONFIG_VP9_HIGHBITDEPTH
+int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b);
+void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b,
+ PSNR_STATS *psnr,
+ unsigned int bit_depth,
+ unsigned int in_bit_depth);
+int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int width, int height,
+ unsigned int input_shift);
+#endif
+void calc_psnr(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b,
+ PSNR_STATS *psnr);
+
+int64_t highbd_get_sse(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int width, int height);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // VPX_DSP_PSNR_H_
diff --git a/vpx_dsp/psnrhvs.c b/vpx_dsp/psnrhvs.c
index 4d3d6ee..9b70c6a 100644
--- a/vpx_dsp/psnrhvs.c
+++ b/vpx_dsp/psnrhvs.c
@@ -19,7 +19,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/ssim.h"
#include "vpx_ports/system_state.h"
-#include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/psnr.h"
#if !defined(M_PI)
# define M_PI (3.141592653589793238462643)
diff --git a/vpx_dsp/txfm_common.h b/vpx_dsp/txfm_common.h
index 442e6a5..9b0e990 100644
--- a/vpx_dsp/txfm_common.h
+++ b/vpx_dsp/txfm_common.h
@@ -57,10 +57,13 @@
static const tran_high_t cospi_30_64 = 1606;
static const tran_high_t cospi_31_64 = 804;
-// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
static const tran_high_t sinpi_1_9 = 5283;
static const tran_high_t sinpi_2_9 = 9929;
static const tran_high_t sinpi_3_9 = 13377;
static const tran_high_t sinpi_4_9 = 15212;
+// 16384 * sqrt(2)
+static const tran_high_t Sqrt2 = 23170;
+
#endif // VPX_DSP_TXFM_COMMON_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index a44f948..dbb41aa 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -22,6 +22,8 @@
DSP_SRCS-yes += bitwriter.c
DSP_SRCS-yes += bitwriter_buffer.c
DSP_SRCS-yes += bitwriter_buffer.h
+DSP_SRCS-yes += psnr.c
+DSP_SRCS-yes += psnr.h
DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c
DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h
DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c
diff --git a/vpxenc.c b/vpxenc.c
index c61d060..f14470a 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1470,6 +1470,8 @@
global->codec->fourcc,
pixel_aspect_ratio);
}
+#else
+ (void)pixel_aspect_ratio;
#endif
if (!stream->config.write_webm) {