Merge "Raise the probability resolution for rANS tokens to 10-bits per symbol" into nextgenv2
diff --git a/test/vp10_fwd_txfm2d_sse4_test.cc b/test/vp10_fwd_txfm2d_sse4_test.cc
index 84b1d0f..52e2d38 100644
--- a/test/vp10_fwd_txfm2d_sse4_test.cc
+++ b/test/vp10_fwd_txfm2d_sse4_test.cc
@@ -51,6 +51,7 @@
int func_idx = get_max_bit(txfm_size) - 2;
Fwd_Txfm2d_Func txfm2d_func_c = txfm2d_func_c_list[func_idx];
Fwd_Txfm2d_Func txfm2d_func_sse4_1 = txfm2d_func_sse4_1_list[func_idx];
+ int tx_type = libvpx_test::get_tx_type(&cfg);
ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -61,8 +62,8 @@
}
}
- txfm2d_func_c(input, output_c, cfg.txfm_size, &cfg, bd);
- txfm2d_func_sse4_1(input, output_sse4_1, cfg.txfm_size, &cfg, bd);
+ txfm2d_func_c(input, output_c, cfg.txfm_size, tx_type, bd);
+ txfm2d_func_sse4_1(input, output_sse4_1, cfg.txfm_size, tx_type, bd);
for (int r = 0; r < txfm_size; r++) {
for (int c = 0; c < txfm_size; c++) {
EXPECT_EQ(output_c[r * txfm_size + c],
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc
index d5a6737..8e90dc2 100644
--- a/test/vp10_fwd_txfm2d_test.cc
+++ b/test/vp10_fwd_txfm2d_test.cc
@@ -71,6 +71,7 @@
fwd_txfm_cfg->shift[2];
double amplify_factor =
amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+ int tx_type = libvpx_test::get_tx_type(fwd_txfm_cfg);
ACMRandom rnd(ACMRandom::DeterministicSeed());
int count = 500;
@@ -83,7 +84,7 @@
ref_output[ni] = 0;
}
- fwd_txfm_func(input, output, txfm_size, fwd_txfm_cfg, bd);
+ fwd_txfm_func(input, output, txfm_size, tx_type, bd);
reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
for (int ni = 0; ni < sqr_txfm_size; ++ni) {
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc
index fb42bc4..c3552dc 100644
--- a/test/vp10_inv_txfm2d_test.cc
+++ b/test/vp10_inv_txfm2d_test.cc
@@ -30,17 +30,7 @@
#if CONFIG_VP9_HIGHBITDEPTH
const int txfm_size_num = 5;
const int txfm_size_ls[5] = {4, 8, 16, 32, 64};
-const TXFM_2D_CFG* fwd_txfm_cfg_ls[5][4] = {
- {&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_adst_4,
- &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_dct_4},
- {&fwd_txfm_2d_cfg_dct_dct_8, &fwd_txfm_2d_cfg_dct_adst_8,
- &fwd_txfm_2d_cfg_adst_adst_8, &fwd_txfm_2d_cfg_adst_dct_8},
- {&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_adst_16,
- &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_dct_16},
- {&fwd_txfm_2d_cfg_dct_dct_32, &fwd_txfm_2d_cfg_dct_adst_32,
- &fwd_txfm_2d_cfg_adst_adst_32, &fwd_txfm_2d_cfg_adst_dct_32},
- {&fwd_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
-
+const int txfm_type[4] = {DCT_DCT, DCT_ADST, ADST_ADST, ADST_DCT};
const TXFM_2D_CFG* inv_txfm_cfg_ls[5][4] = {
{&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_adst_4,
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_dct_4},
@@ -72,11 +62,10 @@
for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
++txfm_type_idx) {
- const TXFM_2D_CFG* fwd_txfm_cfg =
- fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
const TXFM_2D_CFG* inv_txfm_cfg =
inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
- if (fwd_txfm_cfg != NULL) {
+ if (inv_txfm_cfg != NULL) {
+ int tx_type = txfm_type[txfm_type_idx];
const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
const int count = 1000;
@@ -94,7 +83,7 @@
}
}
- fwd_txfm_func(input, output, txfm_size, fwd_txfm_cfg, bd);
+ fwd_txfm_func(input, output, txfm_size, tx_type, bd);
inv_txfm_func(output, ref_input, txfm_size, inv_txfm_cfg, bd);
for (int ni = 0; ni < sqr_txfm_size; ++ni) {
diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h
index 6fcc3bb..6b0bd0a 100644
--- a/test/vp10_txfm_test.h
+++ b/test/vp10_txfm_test.h
@@ -21,6 +21,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/acm_random.h"
+#include "vp10/common/enums.h"
#include "vp10/common/vp10_txfm.h"
namespace libvpx_test {
@@ -104,11 +105,29 @@
const int8_t* range_bit);
typedef void (*Fwd_Txfm2d_Func)(const int16_t*, int32_t*, const int,
- const TXFM_2D_CFG*, const int);
+ int tx_type, const int);
typedef void (*Inv_Txfm2d_Func)(const int32_t*, uint16_t*, const int,
const TXFM_2D_CFG*, const int);
static const int bd = 10;
static const int input_base = (1 << bd);
+
+static INLINE int get_tx_type(const TXFM_2D_CFG *cfg) {
+ int tx_type;
+ if (cfg->txfm_type_col <= TXFM_TYPE_DCT64) {
+ if (cfg->txfm_type_row <= TXFM_TYPE_DCT64) {
+ tx_type = DCT_DCT;
+ } else {
+ tx_type = DCT_ADST;
+ }
+ } else {
+ if (cfg->txfm_type_row <= TXFM_TYPE_DCT64) {
+ tx_type = ADST_DCT;
+ } else {
+ tx_type = ADST_ADST;
+ }
+ }
+ return tx_type;
+}
} // namespace libvpx_test
#endif // VP10_TXFM_TEST_H_
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index 23c131d..b0f100e 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -731,9 +731,11 @@
} else {
const int w = num_8x8_blocks_wide_lookup[block_size];
const int h = num_8x8_blocks_high_lookup[block_size];
- for (i = 0; i < h; i++) {
- memset(&lfm->lfl_y[i][shift_y], filter_level, w);
- }
+ const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+ const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+ for (i = 0; i < h; i++)
+ memset(&lfm->lfl_y[row + i][col], filter_level, w);
}
// These set 1 in the current block size for the block size edges.
@@ -811,9 +813,11 @@
} else {
const int w = num_8x8_blocks_wide_lookup[block_size];
const int h = num_8x8_blocks_high_lookup[block_size];
- for (i = 0; i < h; i++) {
- memset(&lfm->lfl_y[i][shift_y], filter_level, w);
- }
+ const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+ const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+ for (i = 0; i < h; i++)
+ memset(&lfm->lfl_y[row + i][col], filter_level, w);
}
*above_y |= above_prediction_mask[block_size] << shift_y;
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index 674b037..2d2563e 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -454,7 +454,7 @@
const MACROBLOCKD *xd) {
const MODE_INFO *mi = xd->mi[0];
#if CONFIG_VP9_HIGHBITDEPTH
- uint8_t tmp_dst_[2 * MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
uint8_t *tmp_dst =
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
@@ -487,7 +487,7 @@
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX
#else // CONFIG_VP9_HIGHBITDEPTH
- uint8_t tmp_dst[MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
subpel_x, subpel_y, sf, w, h, 0,
interp_filter, xs, ys, xd);
@@ -2020,7 +2020,7 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
{
- uint8_t intrapredictor[MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
vp10_build_intra_predictors_for_interintra(
xd, bsize, 0, intrapredictor, MAX_SB_SIZE);
vp10_combine_interintra(xd, bsize, 0, ypred, ystride,
@@ -2045,7 +2045,7 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
{
- uint8_t uintrapredictor[MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
vp10_build_intra_predictors_for_interintra(
xd, bsize, plane, uintrapredictor, MAX_SB_SIZE);
vp10_combine_interintra(xd, bsize, plane, upred, ustride,
@@ -2204,12 +2204,12 @@
if (ref && get_wedge_bits(mi->mbmi.sb_type)
&& mi->mbmi.use_wedge_interinter) {
#if CONFIG_VP9_HIGHBITDEPTH
- uint8_t tmp_dst_[2 * MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
uint8_t *tmp_dst =
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
#else
- uint8_t tmp_dst[MAX_SB_SQUARE];
+ DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
index d54a174..00f8834 100644
--- a/vp10/common/vp10_fwd_txfm2d.c
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -8,8 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp10/common/vp10_txfm.h"
+#include <assert.h>
+
+#include "vp10/common/enums.h"
#include "vp10/common/vp10_fwd_txfm1d.h"
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+#include "vp10/common/vp10_txfm.h"
static inline TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
switch (txfm_type) {
@@ -83,41 +87,145 @@
}
void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
+ const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[4 * 4];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
+ const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[8 * 8];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_8x8_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
+ const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[16 * 16];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_16x16_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
+ const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[32 * 32];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_32x32_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
+ const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[64 * 64];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_64x64_cfg(tx_type);
(void)bd;
fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
}
+
+const TXFM_2D_CFG* vp10_get_txfm_4x4_cfg(int tx_type) {
+ const TXFM_2D_CFG* cfg = NULL;
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_4;
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_4;
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_4;
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ break;
+ default:
+ assert(0);
+ }
+ return cfg;
+}
+
+const TXFM_2D_CFG* vp10_get_txfm_8x8_cfg(int tx_type) {
+ const TXFM_2D_CFG* cfg = NULL;
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_8;
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_8;
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_8;
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ break;
+ default:
+ assert(0);
+ }
+ return cfg;
+}
+
+const TXFM_2D_CFG* vp10_get_txfm_16x16_cfg(int tx_type) {
+ const TXFM_2D_CFG* cfg = NULL;
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_16;
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_16;
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_16;
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ break;
+ default:
+ assert(0);
+ }
+ return cfg;
+}
+
+const TXFM_2D_CFG* vp10_get_txfm_32x32_cfg(int tx_type) {
+ const TXFM_2D_CFG* cfg = NULL;
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_32;
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_32;
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_32;
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_32;
+ break;
+ default:
+ assert(0);
+ }
+ return cfg;
+}
+
+const TXFM_2D_CFG* vp10_get_txfm_64x64_cfg(int tx_type) {
+ const TXFM_2D_CFG* cfg = NULL;
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_64;
+ break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ default:
+ assert(0);
+ }
+ return cfg;
+}
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h
index e8c82fd..ed976df 100644
--- a/vp10/common/vp10_fwd_txfm2d_cfg.h
+++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -10,6 +10,7 @@
#ifndef VP10_FWD_TXFM2D_CFG_H_
#define VP10_FWD_TXFM2D_CFG_H_
+#include "vp10/common/enums.h"
#include "vp10/common/vp10_fwd_txfm1d.h"
// ---------------- config fwd_dct_dct_4 ----------------
static const int8_t fwd_shift_dct_dct_4[3] = {2, 0, 0};
@@ -399,4 +400,10 @@
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_DCT32}; // .txfm_type_row
+const TXFM_2D_CFG* vp10_get_txfm_4x4_cfg(int tx_type);
+const TXFM_2D_CFG* vp10_get_txfm_8x8_cfg(int tx_type);
+const TXFM_2D_CFG* vp10_get_txfm_16x16_cfg(int tx_type);
+const TXFM_2D_CFG* vp10_get_txfm_32x32_cfg(int tx_type);
+const TXFM_2D_CFG* vp10_get_txfm_64x64_cfg(int tx_type);
+
#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index 1d227dd..ae0d2cb 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -614,15 +614,15 @@
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#fwd txfm
- add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_4x4 sse4_1/;
- add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_8x8 sse4_1/;
- add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_16x16 sse4_1/;
- add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_32x32 sse4_1/;
- add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, const int stride, const TXFM_2D_CFG *cfg, const int bd";
+ add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, const int stride, int tx_type, const int bd";
specialize qw/vp10_fwd_txfm2d_64x64 sse4_1/;
#inv txfm
diff --git a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
index 6664bd5..d884571 100644
--- a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
+++ b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
#include "vp10/common/x86/vp10_txfm1d_sse4.h"
static inline void int16_array_with_stride_to_int32_array_without_stride(
@@ -49,8 +60,8 @@
}
static inline void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- int32_t *txfm_buf) {
+ const int stride, const TXFM_2D_CFG *cfg,
+ int32_t *txfm_buf) {
const int txfm_size = cfg->txfm_size;
const int8_t *shift = cfg->shift;
const int8_t *stage_range_col = cfg->stage_range_col;
@@ -77,41 +88,46 @@
}
void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[16];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[64];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_8x8_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[256];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_16x16_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[1024];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_32x32_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
void vp10_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[4096];
+ const TXFM_2D_CFG* cfg = vp10_get_txfm_64x64_cfg(tx_type);
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
}
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index b192164..491f2ac 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -195,20 +195,10 @@
switch (tx_type) {
case DCT_DCT:
- vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_dct_4, bd);
- break;
case ADST_DCT:
- vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_adst_dct_4, bd);
- break;
case DCT_ADST:
- vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_adst_4, bd);
- break;
case ADST_ADST:
- vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_adst_adst_4, bd);
+ vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@@ -242,20 +232,10 @@
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
- vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_dct_8, bd);
- break;
case ADST_DCT:
- vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_adst_dct_8, bd);
- break;
case DCT_ADST:
- vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_adst_8, bd);
- break;
case ADST_ADST:
- vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_adst_adst_8, bd);
+ vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@@ -290,20 +270,10 @@
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
- vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_dct_16, bd);
- break;
case ADST_DCT:
- vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_adst_dct_16, bd);
- break;
case DCT_ADST:
- vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_adst_16, bd);
- break;
case ADST_ADST:
- vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_adst_adst_16, bd);
+ vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@@ -340,8 +310,7 @@
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
- vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride,
- &fwd_txfm_2d_cfg_dct_dct_32, bd);
+ vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 9423ed2..823095e 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -99,8 +99,8 @@
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int sad_per_bit) {
#if CONFIG_REF_MV
- const MV diff = { (mv->row - ref->row) << 3,
- (mv->col - ref->col) << 3 };
+ const MV diff = { (mv->row - ref->row) * 8,
+ (mv->col - ref->col) * 8 };
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) *
sad_per_bit,
@@ -171,15 +171,6 @@
* could reduce the area.
*/
-/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
- * from the same math as in mv_err_cost(). */
-#define MVC(r, c) \
- (mvcost ? \
- ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
- mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
- error_per_bit + 8192) >> 14 : 0)
-
-
// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
return x & 7;
@@ -192,13 +183,16 @@
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = {r, c}; \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
if (second_pred == NULL) \
thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
src_stride, &sse); \
else \
thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
z, src_stride, &sse, second_pred); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
+ v += thismse; \
+ if (v < besterr) { \
besterr = v; \
br = r; \
bc = c; \
@@ -219,10 +213,13 @@
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER1(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = {r, c}; \
thismse = upsampled_pref_error(xd, vfp, z, src_stride, \
upre(y, y_stride, r, c), y_stride, \
second_pred, w, h, &sse); \
- if ((v = MVC(r, c) + thismse) < besterr) { \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ v += thismse; \
+ if (v < besterr) { \
besterr = v; \
br = r; \
bc = c; \
@@ -334,8 +331,6 @@
const int offset = bestmv->row * y_stride + bestmv->col; \
const uint8_t *const y = xd->plane[0].pre[0].buf; \
\
- int rr = ref_mv->row; \
- int rc = ref_mv->col; \
int br = bestmv->row * 8; \
int bc = bestmv->col * 8; \
int hstep = 4; \
@@ -762,8 +757,6 @@
const int offset = bestmv->row * y_stride + bestmv->col;
const uint8_t *const y = xd->plane[0].pre[0].buf;
- int rr = ref_mv->row;
- int rc = ref_mv->col;
int br = bestmv->row * 8;
int bc = bestmv->col * 8;
int hstep = 4;
@@ -790,7 +783,7 @@
if (use_upsampled_ref)
besterr = upsampled_setup_center_error(xd, bestmv, ref_mv, error_per_bit,
vfp, z, src_stride, y, y_stride,
- second_pred, w, h, (offset << 3),
+ second_pred, w, h, (offset * 8),
mvjcost, mvcost, sse1, distortion);
else
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
@@ -920,7 +913,6 @@
return besterr;
}
-#undef MVC
#undef PRE
#undef CHECK_BETTER
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c8f5715..6ffa790 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1829,6 +1829,8 @@
if (centroids[i] == centroids[i - 1]) {
j = i;
while (j < k - 1) {
+ assert((j + 1) < PALETTE_MAX_SIZE);
+ assert(j > 0);
centroids[j] = centroids[j + 1];
++j;
}
@@ -5046,7 +5048,8 @@
if (!has_second_rf &&
#if CONFIG_EXT_INTER
have_newmv_in_inter_mode(this_mode) &&
- seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV
+ (seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+ vp10_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0)
#else
this_mode == NEWMV &&
(seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV ||
@@ -8873,7 +8876,6 @@
else if (best_mbmode.mv[0].as_int == 0)
best_mbmode.mode = ZEROMV;
} else {
- int i;
int_mv nearestmv[2] = { frame_mv[NEARESTMV][refs[0]],
frame_mv[NEARESTMV][refs[1]] };
int_mv nearmv[2] = { frame_mv[NEARMV][refs[0]],
@@ -8885,6 +8887,7 @@
nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
}
#else
+ int i;
int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) ?
VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2) : INT_MAX;