Merge "Use reduced transform set for 16x16" into nextgenv2
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 78aabe6..79f4e10 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -923,6 +923,15 @@
make_tuple(2, 3, &vpx_highbd_8_variance4x8_c, 8),
make_tuple(2, 2, &vpx_highbd_8_variance4x4_c, 8)));
+#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, VpxHBDVarianceTest,
+ ::testing::Values(
+ make_tuple(2, 2, &vpx_highbd_8_variance4x4_sse4_1, 8),
+ make_tuple(2, 2, &vpx_highbd_10_variance4x4_sse4_1, 10),
+ make_tuple(2, 2, &vpx_highbd_12_variance4x4_sse4_1, 12)));
+#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+
INSTANTIATE_TEST_CASE_P(
C, VpxHBDSubpelVarianceTest,
::testing::Values(
@@ -1125,6 +1134,22 @@
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse, 0)));
#endif // CONFIG_USE_X86INC
+#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, VpxSubpelVarianceTest,
+ ::testing::Values(
+ make_tuple(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_sse4_1, 8),
+ make_tuple(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_sse4_1, 10),
+ make_tuple(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_sse4_1, 12)));
+
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, VpxSubpelAvgVarianceTest,
+ ::testing::Values(
+ make_tuple(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 8),
+ make_tuple(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 10),
+ make_tuple(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 12)));
+#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+
#if CONFIG_VP9_HIGHBITDEPTH
/* TODO(debargha): This test does not support the highbd version
INSTANTIATE_TEST_CASE_P(
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index e144a45..86a7efc 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -20,7 +20,7 @@
#undef MAX_SB_SIZE
-// Pixels per max superblock size
+// Max superblock size
#if CONFIG_EXT_PARTITION
# define MAX_SB_SIZE_LOG2 7
#else
@@ -29,6 +29,9 @@
#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+// Min superblock size
+#define MIN_SB_SIZE_LOG2 6
+
// Pixels per Mode Info (MI) unit
#define MI_SIZE_LOG2 3
#define MI_SIZE (1 << MI_SIZE_LOG2)
@@ -37,6 +40,9 @@
#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2)
+// MI-units per min superblock
+#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+
// Mask to extract MI offset within max MIB
#define MAX_MIB_MASK (MAX_MIB_SIZE - 1)
#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1)
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index b7c8260..41155c9 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c
@@ -244,8 +244,16 @@
}
++square_index;
}
- td->pc_root = &td->pc_tree[tree_nodes - 1];
- td->pc_root[0].none.best_mode_index = 2;
+
+ // Set up the root node for the largest superblock size
+ i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
+ td->pc_root[i] = &td->pc_tree[tree_nodes - 1];
+ td->pc_root[i]->none.best_mode_index = 2;
+ // Set up the root nodes for the rest of the possible superblock sizes
+ while (--i >= 0) {
+ td->pc_root[i] = td->pc_root[i+1]->split[0];
+ td->pc_root[i]->none.best_mode_index = 2;
+ }
}
void vp10_free_pc_tree(ThreadData *td) {
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 6aba475..88e9486 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -4235,6 +4235,7 @@
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+ PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < leaf_nodes; ++i)
@@ -4249,7 +4250,7 @@
}
vp10_zero(x->pred_mv);
- td->pc_root->index = 0;
+ pc_root->index = 0;
if (seg->enabled) {
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
@@ -4269,7 +4270,7 @@
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
- 1, td->pc_root);
+ 1, pc_root);
} else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize;
set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
@@ -4280,7 +4281,7 @@
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
- 1, td->pc_root);
+ 1, pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME) {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
@@ -4289,7 +4290,7 @@
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
- 1, td->pc_root);
+ 1, pc_root);
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
@@ -4303,9 +4304,7 @@
#if CONFIG_SUPERTX
&dummy_rate_nocoef,
#endif // CONFIG_SUPERTX
- INT64_MAX,
- cm->sb_size == BLOCK_LARGEST ? td->pc_root
- : td->pc_root->split[0]);
+ INT64_MAX, pc_root);
}
}
#if CONFIG_ENTROPY
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 2098378..bf7815f 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -266,7 +266,7 @@
PICK_MODE_CONTEXT *leaf_tree;
PC_TREE *pc_tree;
- PC_TREE *pc_root;
+ PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
} ThreadData;
struct EncWorkerData;
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index dd3e437..5936a24 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -491,7 +491,8 @@
TileInfo tile;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
- const PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
+ const PICK_MODE_CONTEXT *ctx =
+ &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none;
int i;
int recon_yoffset, recon_uvoffset;
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 0c8ec43..9423ed2 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -24,6 +24,7 @@
#include "vp10/encoder/encoder.h"
#include "vp10/encoder/mcomp.h"
+#include "vp10/encoder/rdopt.h"
// #define NEW_DIAMOND_SEARCH
@@ -367,8 +368,8 @@
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
- vpx_highbd_comp_avg_pred_c(comp_pred16, second_pred, w, h, y + offset,
- y_stride);
+ vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
+ y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
sse1);
} else {
@@ -2655,6 +2656,29 @@
v = INT_MAX; \
}
+#undef CHECK_BETTER0
+#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
+
+#undef CHECK_BETTER1
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = upsampled_masked_pref_error(xd, \
+ mask, mask_stride, \
+ vfp, z, src_stride, \
+ upre(y, y_stride, r, c), \
+ y_stride, \
+ w, h, &sse); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
int vp10_find_best_masked_sub_pixel_tree(const MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
MV *bestmv, const MV *ref_mv,
@@ -2671,8 +2695,8 @@
const MACROBLOCKD *xd = &x->e_mbd;
unsigned int besterr = INT_MAX;
unsigned int sse;
- unsigned int whichdir;
int thismse;
+ unsigned int whichdir;
unsigned int halfiters = iters_per_step;
unsigned int quarteriters = iters_per_step;
unsigned int eighthiters = iters_per_step;
@@ -2747,6 +2771,276 @@
return besterr;
}
+static unsigned int setup_masked_center_error(const uint8_t *mask,
+ int mask_stride,
+ const MV *bestmv,
+ const MV *ref_mv,
+ int error_per_bit,
+ const vp10_variance_fn_ptr_t *vfp,
+ const uint8_t *const src,
+ const int src_stride,
+ const uint8_t *const y,
+ int y_stride,
+ int offset,
+ int *mvjcost, int *mvcost[2],
+ unsigned int *sse1,
+ int *distortion) {
+ unsigned int besterr;
+ besterr = vfp->mvf(y + offset, y_stride, src, src_stride,
+ mask, mask_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+static int upsampled_masked_pref_error(const MACROBLOCKD *xd,
+ const uint8_t *mask,
+ int mask_stride,
+ const vp10_variance_fn_ptr_t *vfp,
+ const uint8_t *const src,
+ const int src_stride,
+ const uint8_t *const y, int y_stride,
+ int w, int h, unsigned int *sse) {
+ unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
+ vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+
+ besterr = vfp->mvf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride,
+ mask, mask_stride, sse);
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+ (void) xd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_upsampled_pred(pred, w, h, y, y_stride);
+
+ besterr = vfp->mvf(pred, w, src, src_stride,
+ mask, mask_stride, sse);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ return besterr;
+}
+
+static unsigned int upsampled_setup_masked_center_error(
+ const MACROBLOCKD *xd,
+ const uint8_t *mask, int mask_stride,
+ const MV *bestmv, const MV *ref_mv,
+ int error_per_bit, const vp10_variance_fn_ptr_t *vfp,
+ const uint8_t *const src, const int src_stride,
+ const uint8_t *const y, int y_stride,
+ int w, int h, int offset, int *mvjcost, int *mvcost[2],
+ unsigned int *sse1, int *distortion) {
+ unsigned int besterr = upsampled_masked_pref_error(
+ xd, mask, mask_stride, vfp, src, src_stride,
+ y + offset, y_stride, w, h, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+int vp10_find_best_masked_sub_pixel_tree_up(VP10_COMP *cpi,
+ MACROBLOCK *x,
+ const uint8_t *mask,
+ int mask_stride,
+ int mi_row, int mi_col,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp10_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1,
+ int is_second,
+ int use_upsampled_ref) {
+ const uint8_t *const z = x->plane[0].src.buf;
+ const uint8_t *const src_address = z;
+ const int src_stride = x->plane[0].src.stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int thismse;
+
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ int iter;
+ int round = 3 - forced_stop;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ int tr = br;
+ int tc = bc;
+ const MV *search_step = search_step_table;
+ int idx, best_idx = -1;
+ unsigned int cost_array[5];
+ int kr, kc;
+ const int w = 4 * num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int h = 4 * num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int offset;
+ int y_stride;
+ const uint8_t *y;
+
+ const struct buf_2d backup_pred = pd->pre[is_second];
+ if (use_upsampled_ref) {
+ int ref = xd->mi[0]->mbmi.ref_frame[is_second];
+ const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+ setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
+ upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+ NULL, pd->subsampling_x, pd->subsampling_y);
+ }
+ y = pd->pre[is_second].buf;
+ y_stride = pd->pre[is_second].stride;
+ offset = bestmv->row * y_stride + bestmv->col;
+
+ if (!(allow_hp && vp10_use_mv_hp(ref_mv)))
+ if (round == 3)
+ round = 2;
+
+ bestmv->row *= 8;
+ bestmv->col *= 8;
+
+ // use_upsampled_ref can be 0 or 1
+ if (use_upsampled_ref)
+ besterr = upsampled_setup_masked_center_error(
+ xd, mask, mask_stride, bestmv, ref_mv, error_per_bit,
+ vfp, z, src_stride, y, y_stride,
+ w, h, (offset << 3),
+ mvjcost, mvcost, sse1, distortion);
+ else
+ besterr = setup_masked_center_error(
+ mask, mask_stride, bestmv, ref_mv, error_per_bit,
+ vfp, z, src_stride, y, y_stride,
+ offset, mvjcost, mvcost, sse1, distortion);
+
+ for (iter = 0; iter < round; ++iter) {
+ // Check vertical and horizontal sub-pixel positions.
+ for (idx = 0; idx < 4; ++idx) {
+ tr = br + search_step[idx].row;
+ tc = bc + search_step[idx].col;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = {tr, tc};
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_masked_pref_error(xd,
+ mask, mask_stride,
+ vfp, src_address, src_stride,
+ pre_address, y_stride,
+ w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
+ (tc >> 3);
+ thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride,
+ mask, mask_stride, &sse);
+ }
+
+ cost_array[idx] = thismse +
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ if (cost_array[idx] < besterr) {
+ best_idx = idx;
+ besterr = cost_array[idx];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+ }
+
+ // Check diagonal sub-pixel position
+ kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
+ kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+
+ tc = bc + kc;
+ tr = br + kr;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = {tr, tc};
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_masked_pref_error(xd,
+ mask, mask_stride,
+ vfp, src_address, src_stride,
+ pre_address, y_stride,
+ w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+
+ thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, mask, mask_stride, &sse);
+ }
+
+ cost_array[4] = thismse +
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ if (cost_array[4] < besterr) {
+ best_idx = 4;
+ besterr = cost_array[4];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+
+ if (best_idx < 4 && best_idx >= 0) {
+ br += search_step[best_idx].row;
+ bc += search_step[best_idx].col;
+ } else if (best_idx == 4) {
+ br = tr;
+ bc = tc;
+ }
+
+ if (iters_per_step > 1 && best_idx != -1) {
+ if (use_upsampled_ref) {
+ SECOND_LEVEL_CHECKS_BEST(1);
+ } else {
+ SECOND_LEVEL_CHECKS_BEST(0);
+ }
+ }
+
+ tr = br;
+ tc = bc;
+
+ search_step += 4;
+ hstep >>= 1;
+ best_idx = -1;
+ }
+
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void) tr;
+ (void) tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if (use_upsampled_ref) {
+ pd->pre[is_second] = backup_pred;
+ }
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
#undef DIST
#undef MVC
#undef CHECK_BETTER
diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h
index f99cd8b..c12e7af 100644
--- a/vp10/encoder/mcomp.h
+++ b/vp10/encoder/mcomp.h
@@ -169,7 +169,24 @@
int iters_per_step,
int *mvjcost, int *mvcost[2],
int *distortion,
- unsigned int *sse1, int is_second);
+ unsigned int *sse1,
+ int is_second);
+int vp10_find_best_masked_sub_pixel_tree_up(struct VP10_COMP *cpi,
+ MACROBLOCK *x,
+ const uint8_t *mask,
+ int mask_stride,
+ int mi_row, int mi_col,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp,
+ int error_per_bit,
+ const vp10_variance_fn_ptr_t *vfp,
+ int forced_stop,
+ int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion,
+ unsigned int *sse1,
+ int is_second,
+ int use_upsampled_ref);
int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
MV *mvp_full, int step_param,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 87836cb..d4538af 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -87,8 +87,8 @@
const double ext_tx_th = 0.99;
#endif
-const double ADST_FLIP_SVM[8] = {-7.3283, -3.0450, -3.2450, 3.6403, // vert
- -9.4204, -3.1821, -4.6851, 4.1469}; // horz
+const double ADST_FLIP_SVM[8] = {-6.6623, -2.8062, -3.2531, 3.1671, // vert
+ -7.7051, -3.2234, -3.6193, 3.4533}; // horz
typedef struct {
PREDICTION_MODE mode;
@@ -355,14 +355,14 @@
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
-#define FAST_EXT_TX_CORR_MARGIN 0.5
-#define FAST_EXT_TX_EDST_MARGIN 0.05
+#define FAST_EXT_TX_CORR_MARGIN 0.3
+#define FAST_EXT_TX_EDST_MARGIN 0.5
typedef enum {
DCT_1D = 0,
ADST_1D = 1,
FLIPADST_1D = 2,
- DST_1D = 3,
+ IDTX_1D = 3,
TX_TYPES_1D = 4,
} TX_TYPE_1D;
@@ -568,18 +568,18 @@
}
}
-int dct_vs_dst(int16_t *diff, int stride, int w, int h,
- double *hcorr, double *vcorr) {
+int dct_vs_idtx(int16_t *diff, int stride, int w, int h,
+ double *hcorr, double *vcorr) {
int prune_bitmask = 0;
get_horver_correlation(diff, stride, w, h, hcorr, vcorr);
if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
- prune_bitmask |= 1 << DST_1D;
+ prune_bitmask |= 1 << IDTX_1D;
else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
prune_bitmask |= 1 << DCT_1D;
if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
- prune_bitmask |= 1 << (DST_1D + 8);
+ prune_bitmask |= 1 << (IDTX_1D + 8);
else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
prune_bitmask |= 1 << (DCT_1D + 8);
return prune_bitmask;
@@ -600,7 +600,7 @@
vp10_subtract_plane(x, bsize, 0);
return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
pd->dst.stride, hdist, vdist) |
- dct_vs_dst(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
+ dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
}
#endif // CONFIG_EXT_TX
@@ -653,13 +653,13 @@
FLIPADST_1D,
ADST_1D,
FLIPADST_1D,
- DST_1D,
+ IDTX_1D,
DCT_1D,
- DST_1D,
+ IDTX_1D,
ADST_1D,
- DST_1D,
+ IDTX_1D,
FLIPADST_1D,
- DST_1D,
+ IDTX_1D,
};
static TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D,
@@ -671,16 +671,14 @@
FLIPADST_1D,
FLIPADST_1D,
ADST_1D,
+ IDTX_1D,
+ IDTX_1D,
DCT_1D,
- DST_1D,
+ IDTX_1D,
ADST_1D,
- DST_1D,
+ IDTX_1D,
FLIPADST_1D,
- DST_1D,
- DST_1D,
};
- if (tx_type >= IDTX)
- return 1;
return !(((prune >> vtx_tab[tx_type]) & 1) |
((prune >> (htx_tab[tx_type] + 8)) & 1));
#else
@@ -5978,15 +5976,18 @@
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
- vp10_find_best_masked_sub_pixel_tree(x, mask, mask_stride,
- &tmp_mv->as_mv, &ref_mv,
- cm->allow_high_precision_mv,
- x->errorperbit,
- &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- x->nmvjointcost, x->mvcost,
- &dis, &x->pred_sse[ref], ref_idx);
+ vp10_find_best_masked_sub_pixel_tree_up(cpi, x, mask, mask_stride,
+ mi_row, mi_col,
+ &tmp_mv->as_mv, &ref_mv,
+ cm->allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost,
+ &dis, &x->pred_sse[ref],
+ ref_idx,
+ cpi->sf.use_upsampled_references);
}
*rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index 90c8bed..e6be1dd 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -433,7 +433,7 @@
return *sse; \
}
-void highbd_var_filter_block2d_bil_first_pass(
+void vpx_highbd_var_filter_block2d_bil_first_pass(
const uint8_t *src_ptr8,
uint16_t *output_ptr,
unsigned int src_pixels_per_line,
@@ -459,7 +459,7 @@
}
}
-void highbd_var_filter_block2d_bil_second_pass(
+void vpx_highbd_var_filter_block2d_bil_second_pass(
const uint16_t *src_ptr,
uint16_t *output_ptr,
unsigned int src_pixels_per_line,
@@ -492,13 +492,14 @@
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
- dst_stride, sse); \
+ dst_stride, sse); \
} \
\
uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
@@ -509,10 +510,11 @@
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -526,10 +528,11 @@
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
@@ -546,16 +549,17 @@
uint16_t temp2[H * W]; \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
\
return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
- dst_stride, sse); \
+ dst_stride, sse); \
} \
\
uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
@@ -568,10 +572,11 @@
uint16_t temp2[H * W]; \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -590,10 +595,11 @@
uint16_t temp2[H * W]; \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
@@ -914,11 +920,11 @@
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
- H + 1, W, \
- bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+ H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
@@ -934,11 +940,11 @@
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
- H + 1, W, \
- bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+ H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_10_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
@@ -954,11 +960,11 @@
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
- H + 1, W, \
- bilinear_filters_2t[xoffset]); \
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- bilinear_filters_2t[yoffset]); \
+ vpx_highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, \
+ H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
\
return vpx_highbd_12_masked_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, \
diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h
index 4ad23f8..1759854 100644
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -130,7 +130,7 @@
} vp10_variance_fn_ptr_t;
#endif // CONFIG_VP10
-void highbd_var_filter_block2d_bil_first_pass(
+void vpx_highbd_var_filter_block2d_bil_first_pass(
const uint8_t *src_ptr8,
uint16_t *output_ptr,
unsigned int src_pixels_per_line,
@@ -139,7 +139,7 @@
unsigned int output_width,
const uint8_t *filter);
-void highbd_var_filter_block2d_bil_second_pass(
+void vpx_highbd_var_filter_block2d_bil_second_pass(
const uint16_t *src_ptr,
uint16_t *output_ptr,
unsigned int src_pixels_per_line,
diff --git a/vpx_dsp/x86/highbd_variance_sse4.c b/vpx_dsp/x86/highbd_variance_sse4.c
index 18ecc7e..5c1dfe4 100644
--- a/vpx_dsp/x86/highbd_variance_sse4.c
+++ b/vpx_dsp/x86/highbd_variance_sse4.c
@@ -119,10 +119,12 @@
uint16_t fdata3[(4 + 1) * 4];
uint16_t temp2[4 * 4];
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
- 4, bilinear_filters_2t[xoffset]);
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
+ vpx_highbd_var_filter_block2d_bil_first_pass(
+ src, fdata3, src_stride, 1, 4 + 1,
+ 4, bilinear_filters_2t[xoffset]);
+ vpx_highbd_var_filter_block2d_bil_second_pass(
+ fdata3, temp2, 4, 4, 4, 4,
+ bilinear_filters_2t[yoffset]);
return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp2),
4, dst, dst_stride, sse);
@@ -137,10 +139,12 @@
uint16_t fdata3[(4 + 1) * 4];
uint16_t temp2[4 * 4];
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
- 4, bilinear_filters_2t[xoffset]);
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
+ vpx_highbd_var_filter_block2d_bil_first_pass(
+ src, fdata3, src_stride, 1, 4 + 1,
+ 4, bilinear_filters_2t[xoffset]);
+ vpx_highbd_var_filter_block2d_bil_second_pass(
+ fdata3, temp2, 4, 4, 4, 4,
+ bilinear_filters_2t[yoffset]);
return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp2),
4, dst, dst_stride, sse);
@@ -155,10 +159,12 @@
uint16_t fdata3[(4 + 1) * 4];
uint16_t temp2[4 * 4];
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
- 4, bilinear_filters_2t[xoffset]);
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
+ vpx_highbd_var_filter_block2d_bil_first_pass(
+ src, fdata3, src_stride, 1, 4 + 1,
+ 4, bilinear_filters_2t[xoffset]);
+ vpx_highbd_var_filter_block2d_bil_second_pass(
+ fdata3, temp2, 4, 4, 4, 4,
+ bilinear_filters_2t[yoffset]);
return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp2),
4, dst, dst_stride, sse);
@@ -177,13 +183,15 @@
uint16_t temp2[4 * 4];
DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
- 4, bilinear_filters_2t[xoffset]);
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
+ vpx_highbd_var_filter_block2d_bil_first_pass(
+ src, fdata3, src_stride, 1, 4 + 1,
+ 4, bilinear_filters_2t[xoffset]);
+ vpx_highbd_var_filter_block2d_bil_second_pass(
+ fdata3, temp2, 4, 4, 4, 4,
+ bilinear_filters_2t[yoffset]);
- vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4,
- CONVERT_TO_BYTEPTR(temp2), 4);
+ vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4,
+ CONVERT_TO_BYTEPTR(temp2), 4);
return vpx_highbd_8_variance4x4(CONVERT_TO_BYTEPTR(temp3),
4, dst, dst_stride, sse);
@@ -200,13 +208,15 @@
uint16_t temp2[4 * 4];
DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
- 4, bilinear_filters_2t[xoffset]);
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
+ vpx_highbd_var_filter_block2d_bil_first_pass(
+ src, fdata3, src_stride, 1, 4 + 1,
+ 4, bilinear_filters_2t[xoffset]);
+ vpx_highbd_var_filter_block2d_bil_second_pass(
+ fdata3, temp2, 4, 4, 4, 4,
+ bilinear_filters_2t[yoffset]);
- vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4,
- CONVERT_TO_BYTEPTR(temp2), 4);
+ vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4,
+ CONVERT_TO_BYTEPTR(temp2), 4);
return vpx_highbd_10_variance4x4(CONVERT_TO_BYTEPTR(temp3),
4, dst, dst_stride, sse);
@@ -223,13 +233,15 @@
uint16_t temp2[4 * 4];
DECLARE_ALIGNED(16, uint16_t, temp3[4 * 4]);
- highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, 4 + 1,
- 4, bilinear_filters_2t[xoffset]);
- highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4,
- bilinear_filters_2t[yoffset]);
+ vpx_highbd_var_filter_block2d_bil_first_pass(
+ src, fdata3, src_stride, 1, 4 + 1,
+ 4, bilinear_filters_2t[xoffset]);
+ vpx_highbd_var_filter_block2d_bil_second_pass(
+ fdata3, temp2, 4, 4, 4, 4,
+ bilinear_filters_2t[yoffset]);
- vpx_highbd_comp_avg_pred_c(temp3, second_pred, 4, 4,
- CONVERT_TO_BYTEPTR(temp2), 4);
+ vpx_highbd_comp_avg_pred(temp3, second_pred, 4, 4,
+ CONVERT_TO_BYTEPTR(temp2), 4);
return vpx_highbd_12_variance4x4(CONVERT_TO_BYTEPTR(temp3),
4, dst, dst_stride, sse);