Merge "mb_lpf_horizontal_edge AVX2 optimization"
diff --git a/test/datarate_test.cc b/test/datarate_test.cc
index 6d50644..85f4bb6 100644
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -176,14 +176,32 @@
}
}
-class DatarateTestVP9 : public DatarateTest {
+class DatarateTestVP9 : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ public:
+ DatarateTestVP9() : EncoderTest(GET_PARAM(0)) {}
+
protected:
virtual ~DatarateTestVP9() {}
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GET_PARAM(1));
+ set_cpu_used_ = GET_PARAM(2);
+ ResetModel();
+ }
+
+ virtual void ResetModel() {
+ last_pts_ = 0;
+ frame_number_ = 0;
+ bits_total_ = 0;
+ duration_ = 0.0;
+ }
+
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
- encoder->Control(VP8E_SET_CPUUSED, 2);
+ encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
}
const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den;
@@ -205,6 +223,14 @@
effective_datarate_ = ((bits_total_) / 1000.0) / duration_;
}
}
+
+ vpx_codec_pts_t last_pts_;
+ double timebase_;
+ int frame_number_;
+ int64_t bits_total_;
+ double duration_;
+ double effective_datarate_;
+ int set_cpu_used_;
};
// There is no buffer model/frame dropper in VP9 currently, so for now we
@@ -218,7 +244,7 @@
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 140);
- for (int i = 200; i < 800; i += 200) {
+ for (int i = 150; i < 800; i += 200) {
cfg_.rc_target_bitrate = i;
ResetModel();
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
@@ -231,5 +257,6 @@
VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES);
VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9,
- ::testing::Values(::libvpx_test::kOnePassGood));
+ ::testing::Values(::libvpx_test::kOnePassGood),
+ ::testing::Range(1, 5));
} // namespace
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 3347b35..21c91d6 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -309,8 +309,8 @@
192, 128, 64
};
-static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTERS+1]
- [SWITCHABLE_FILTERS-1] = {
+static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+ [SWITCHABLE_FILTERS - 1] = {
{ 235, 162, },
{ 36, 255, },
{ 34, 3, },
@@ -416,7 +416,7 @@
fc->partition_prob[INTER_FRAME][i], 0);
if (cm->mcomp_filter_type == SWITCHABLE) {
- for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
counts->switchable_interp[i],
pre_fc->switchable_interp_prob[i],
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index ab37b75..ea96555 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -16,6 +16,7 @@
#define TX_SIZE_CONTEXTS 2
#define SWITCHABLE_FILTERS 3 // number of switchable filters
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
// #define MODE_STATS
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index a823de8..ba2e9d8 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -43,7 +43,7 @@
vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
vp9_prob partition_prob[FRAME_TYPES][PARTITION_CONTEXTS][PARTITION_TYPES - 1];
vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
- vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
+ vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1];
vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
@@ -62,7 +62,7 @@
vp9_coeff_count_model coef[TX_SIZES][BLOCK_TYPES];
unsigned int eob_branch[TX_SIZES][BLOCK_TYPES][REF_TYPES]
[COEF_BANDS][PREV_COEF_CONTEXTS];
- unsigned int switchable_interp[SWITCHABLE_FILTERS + 1]
+ unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS];
unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 53b9003..1c96788 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -161,13 +161,12 @@
// scaling case. It needs to be done on the scaled MV, not the pre-scaling
// MV. Note however that it performs the subsampling aware scaling so
// that the result is always q4.
- const MV res_mv = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
- pd->subsampling_x,
- pd->subsampling_y);
+ // mv_precision precision is MV_PRECISION_Q4.
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
+ pd->subsampling_x,
+ pd->subsampling_y);
uint8_t *pre;
- // mv_precision precision is MV_PRECISION_Q4.
- const MV mv_q4 = {res_mv.row, res_mv.col };
MV32 scaled_mv;
int xs, ys;
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 30d5b6d..bf3a101 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -109,7 +109,7 @@
static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
int i, j;
- for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j)
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
}
@@ -382,15 +382,11 @@
static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
const TileInfo *const tile,
int mi_row, int mi_col,
- vp9_reader *r, BLOCK_SIZE bsize, int index) {
+ vp9_reader *r, BLOCK_SIZE bsize) {
const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi;
int eobtotal;
- if (less8x8)
- if (index > 0)
- return;
-
set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r);
@@ -448,54 +444,50 @@
static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
const TileInfo *const tile,
int mi_row, int mi_col,
- vp9_reader* r, BLOCK_SIZE bsize, int index) {
+ vp9_reader* r, BLOCK_SIZE bsize) {
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
- PARTITION_TYPE partition = PARTITION_NONE;
+ PARTITION_TYPE partition;
BLOCK_SIZE subsize;
+ int ctx;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- if (bsize < BLOCK_8X8) {
- if (index > 0)
- return;
- } else {
- const int ctx = partition_plane_context(xd->above_seg_context,
- xd->left_seg_context,
- mi_row, mi_col, bsize);
- partition = read_partition(hbs, cm->mi_rows, cm->mi_cols, mi_row, mi_col,
- cm->fc.partition_prob[cm->frame_type][ctx], r);
+ ctx = partition_plane_context(xd->above_seg_context, xd->left_seg_context,
+ mi_row, mi_col, bsize);
+ partition = read_partition(hbs, cm->mi_rows, cm->mi_cols, mi_row, mi_col,
+ cm->fc.partition_prob[cm->frame_type][ctx], r);
- if (!cm->frame_parallel_decoding_mode)
- ++cm->counts.partition[ctx][partition];
- }
+ if (!cm->frame_parallel_decoding_mode)
+ ++cm->counts.partition[ctx][partition];
subsize = get_subsize(bsize, partition);
-
- switch (partition) {
- case PARTITION_NONE:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, 0);
- break;
- case PARTITION_HORZ:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, 0);
- if (mi_row + hbs < cm->mi_rows)
- decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize, 1);
- break;
- case PARTITION_VERT:
- decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, 0);
- if (mi_col + hbs < cm->mi_cols)
- decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize, 1);
- break;
- case PARTITION_SPLIT: {
- int n;
- for (n = 0; n < 4; n++) {
- const int j = n >> 1, i = n & 1;
- decode_modes_sb(cm, xd, tile, mi_row + j * hbs, mi_col + i * hbs,
- r, subsize, n);
- }
- } break;
- default:
- assert(!"Invalid partition type");
+ if (subsize < BLOCK_8X8) {
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+ break;
+ case PARTITION_HORZ:
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+ if (mi_row + hbs < cm->mi_rows)
+ decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
+ break;
+ case PARTITION_VERT:
+ decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+ if (mi_col + hbs < cm->mi_cols)
+ decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
+ break;
+ case PARTITION_SPLIT:
+ decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize);
+ decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
+ decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
+ decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
+ break;
+ default:
+ assert(!"Invalid partition type");
+ }
}
// update partition context
@@ -780,7 +772,7 @@
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64, 0);
+ decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
if (pbi->do_loopfilter_inline) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
@@ -935,7 +927,7 @@
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
- mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64, 0);
+ mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
}
return !tile_data->xd.corrupted;
}
@@ -963,7 +955,8 @@
vp9_worker_init(worker);
worker->hook = (VP9WorkerHook)tile_worker_hook;
- CHECK_MEM_ERROR(cm, worker->data1, vpx_malloc(sizeof(TileWorkerData)));
+ CHECK_MEM_ERROR(cm, worker->data1,
+ vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
if (i < num_workers - 1 && !vp9_worker_reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index c677907..a996e0e 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -53,8 +53,7 @@
int64_t tx_count_32x32p_stats[TX_SIZE_CONTEXTS][TX_SIZES];
int64_t tx_count_16x16p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 1];
int64_t tx_count_8x8p_stats[TX_SIZE_CONTEXTS][TX_SIZES - 2];
-int64_t switchable_interp_stats[SWITCHABLE_FILTERS+1]
- [SWITCHABLE_FILTERS];
+int64_t switchable_interp_stats[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
void init_tx_count_stats() {
vp9_zero(tx_count_32x32p_stats);
@@ -87,10 +86,9 @@
static void update_switchable_interp_stats(VP9_COMMON *cm) {
int i, j;
- for (i = 0; i < SWITCHABLE_FILTERS+1; ++i)
- for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ for (j = 0; j < SWITCHABLE_FILTERS; ++j)
switchable_interp_stats[i][j] += cm->fc.switchable_interp_count[i][j];
- }
}
void write_tx_count_stats() {
@@ -140,9 +138,9 @@
fclose(fp);
printf(
- "vp9_default_switchable_filter_count[SWITCHABLE_FILTERS+1]"
+ "vp9_default_switchable_filter_count[SWITCHABLE_FILTER_CONTEXTS]"
"[SWITCHABLE_FILTERS] = {\n");
- for (i = 0; i < SWITCHABLE_FILTERS+1; i++) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
printf(" { ");
for (j = 0; j < SWITCHABLE_FILTERS; j++) {
printf("%"PRId64", ", switchable_interp_stats[i][j]);
@@ -236,17 +234,16 @@
static void update_switchable_interp_probs(VP9_COMP *const cpi,
vp9_writer* const bc) {
VP9_COMMON *const cm = &cpi->common;
- unsigned int branch_ct[SWITCHABLE_FILTERS + 1]
- [SWITCHABLE_FILTERS - 1][2];
- vp9_prob new_prob[SWITCHABLE_FILTERS + 1][SWITCHABLE_FILTERS - 1];
+ unsigned int branch_ct[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1][2];
+ vp9_prob new_prob[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1];
int i, j;
- for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
vp9_tree_probs_from_distribution(
vp9_switchable_interp_tree,
new_prob[j], branch_ct[j],
cm->counts.switchable_interp[j], 0);
}
- for (j = 0; j <= SWITCHABLE_FILTERS; ++j) {
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) {
vp9_cond_prob_diff_update(bc, &cm->fc.switchable_interp_prob[j][i],
branch_ct[j][i]);
@@ -1142,7 +1139,7 @@
int i, j, c = 0;
for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
count[i] = 0;
- for (j = 0; j <= SWITCHABLE_FILTERS; ++j)
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
count[i] += cm->counts.switchable_interp[j][i];
c += (count[i] > 0);
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index db2564b..583c6c8 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -42,7 +42,7 @@
int comp_pred_diff;
int single_pred_diff;
int64_t tx_rd_diff[TX_MODES];
- int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
// motion vector cache for adaptive motion search control in partition
// search loop
@@ -118,8 +118,7 @@
unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
- int switchable_interp_costs[SWITCHABLE_FILTERS + 1]
- [SWITCHABLE_FILTERS];
+ int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 9408e54..44ade18 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -465,7 +465,7 @@
cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
- for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
}
}
@@ -2279,7 +2279,7 @@
cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
}
- for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;
cpi->rd_filter_threshes[frame_type][i] =
(cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 91546e8..e52e8ec 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -418,6 +418,7 @@
struct encode_b_args *const args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
+ struct optimize_ctx *const ctx = args->ctx;
struct macroblockd_plane *const pd = &xd->plane[plane];
const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
block);
@@ -429,14 +430,18 @@
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+ int x, y;
pd->eobs[block] = 0;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
+ ctx->ta[plane][x] = 0;
+ ctx->tl[plane][y] = 0;
return;
}
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
if (x->optimize)
- vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
+ vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
if (x->skip_encode || pd->eobs[block] == 0)
return;
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index b867d8b..7eb6592 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -36,7 +36,7 @@
vp9_kf_uv_mode_prob[INTRA_MODES - 1],
vp9_intra_mode_tree);
- for (i = 0; i <= SWITCHABLE_FILTERS; ++i)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
cm->fc.switchable_interp_prob[i],
vp9_switchable_interp_tree);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index ad214c7..b664f1e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -2866,7 +2866,7 @@
cpi->active_best_quality = inter_minq[q];
// 1-pass: for now, use the average Q for the active_best, if its lower
// than active_worst.
- if (cpi->pass == 0 && (cpi->avg_frame_qindex < cpi->active_worst_quality))
+ if (cpi->pass == 0 && (cpi->avg_frame_qindex < q))
cpi->active_best_quality = inter_minq[cpi->avg_frame_qindex];
#endif
@@ -2902,7 +2902,14 @@
if (cm->frame_type == KEY_FRAME && !cpi->this_key_frame_forced) {
*top_index =
(cpi->active_worst_quality + cpi->active_best_quality * 3) / 4;
+ // If this is the first (key) frame in 1-pass, active best is the user
+ // best-allowed, and leave the top_index to active_worst.
+ if (cpi->pass == 0 && cpi->common.current_video_frame == 0) {
+ cpi->active_best_quality = cpi->oxcf.best_allowed_q;
+ *top_index = cpi->oxcf.worst_allowed_q;
+ }
} else if (!cpi->is_src_frame_alt_ref &&
+ (cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER) &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
*top_index =
(cpi->active_worst_quality + cpi->active_best_quality) / 2;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 20831be..0498043 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -396,9 +396,9 @@
// FIXME(rbultje) can this overflow?
int rd_tx_select_threshes[4][TX_MODES];
- int64_t rd_filter_diff[SWITCHABLE_FILTERS + 1];
- int64_t rd_filter_threshes[4][SWITCHABLE_FILTERS + 1];
- int64_t rd_filter_cache[SWITCHABLE_FILTERS + 1];
+ int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t rd_filter_threshes[4][SWITCHABLE_FILTER_CONTEXTS];
+ int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
int RDMULT;
int RDDIV;
@@ -641,7 +641,7 @@
int dummy_packing; /* flag to indicate if packing is dummy */
- unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1]
+ unsigned int switchable_interp_count[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS];
unsigned int tx_stepdown_count[TX_SIZES];
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c134208..f9de78b 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -611,7 +611,7 @@
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
if (plane == 0)
- x->zcoeff_blk[tx_size][block] = rd1 > rd2;
+ x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
args->this_rate += args->rate;
args->this_dist += args->dist;
@@ -1654,6 +1654,7 @@
MB_PREDICTION_MODE this_mode;
MODE_INFO *mi = x->e_mbd.mi_8x8[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
+ struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
const int label_count = 4;
int64_t this_segment_rd = 0;
int label_mv_thresh;
@@ -1668,8 +1669,8 @@
int subpelmv = 1, have_ref = 0;
const int has_second_rf = has_second_ref(mbmi);
- vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
- vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
+ vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
+ vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
v_fn_ptr = &cpi->fn_ptr[bsize];
@@ -1747,7 +1748,7 @@
}
}
- vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
+ vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
sizeof(bsi->rdstat[i][mode_idx].ta));
vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
@@ -1951,6 +1952,13 @@
ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
sizeof(SEG_RDSTAT));
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].eobs =
+ ref_bsi->rdstat[i + 1][mode_idx].eobs;
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].eobs =
+ ref_bsi->rdstat[i + 2][mode_idx].eobs;
+
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost;
@@ -1971,7 +1979,11 @@
bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
bsi->rdstat[i][mode_idx].brate, 0);
bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
- bsi->rdstat[i][mode_idx].eobs = x->e_mbd.plane[0].eobs[i];
+ bsi->rdstat[i][mode_idx].eobs = pd->eobs[i];
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1];
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2];
}
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
@@ -2207,7 +2219,7 @@
int_mv *second_ref_mv,
int64_t comp_pred_diff[NB_PREDICTION_TYPES],
int64_t tx_size_diff[TX_MODES],
- int64_t best_filter_diff[SWITCHABLE_FILTERS + 1]) {
+ int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
MACROBLOCKD *const xd = &x->e_mbd;
// Take a snapshot of the coding context so it can be
@@ -2225,7 +2237,7 @@
vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
- sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
+ sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
}
static void setup_pred_block(const MACROBLOCKD *xd,
@@ -2271,12 +2283,8 @@
// set up scaling factors
scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
- scale[frame_type].x_offset_q4 =
- ROUND_POWER_OF_TWO(mi_col * MI_SIZE * scale[frame_type].sfc->x_scale_fp,
- REF_SCALE_SHIFT) & 0xf;
- scale[frame_type].y_offset_q4 =
- ROUND_POWER_OF_TWO(mi_row * MI_SIZE * scale[frame_type].sfc->y_scale_fp,
- REF_SCALE_SHIFT) & 0xf;
+ scale[frame_type].sfc->set_scaled_offsets(&scale[frame_type],
+ mi_row * MI_SIZE, mi_col * MI_SIZE);
// TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
// use the UV scaling factors.
@@ -3119,8 +3127,8 @@
int64_t best_tx_diff[TX_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
int64_t best_pred_rd[NB_PREDICTION_TYPES];
- int64_t best_filter_rd[SWITCHABLE_FILTERS + 1];
- int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode = { 0 };
int j;
int mode_index, best_mode_index = 0;
@@ -3158,7 +3166,7 @@
best_pred_rd[i] = INT64_MAX;
for (i = 0; i < TX_MODES; i++)
best_tx_rd[i] = INT64_MAX;
- for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = INT64_MAX;
for (i = 0; i < TX_SIZES; i++)
rate_uv_intra[i] = INT_MAX;
@@ -3547,7 +3555,7 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
- for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
}
@@ -3630,7 +3638,7 @@
cm->mcomp_filter_type != BILINEAR) {
int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
SWITCHABLE_FILTERS : cm->mcomp_filter_type];
- for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
int64_t adj_rd;
// In cases of poor prediction, filter_cache[] can contain really big
// values, which actually are bigger than this_rd itself. This can
@@ -3752,7 +3760,7 @@
}
if (!x->skip) {
- for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
if (best_filter_rd[i] == INT64_MAX)
best_filter_diff[i] = 0;
else
@@ -3817,8 +3825,8 @@
int64_t best_tx_diff[TX_MODES];
int64_t best_pred_diff[NB_PREDICTION_TYPES];
int64_t best_pred_rd[NB_PREDICTION_TYPES];
- int64_t best_filter_rd[SWITCHABLE_FILTERS + 1];
- int64_t best_filter_diff[SWITCHABLE_FILTERS + 1];
+ int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode = { 0 };
int mode_index, best_mode_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
@@ -3840,7 +3848,7 @@
int best_skip2 = 0;
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
- vp9_zero(x->zcoeff_blk);
+ vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
for (i = 0; i < 4; i++) {
int j;
@@ -3855,7 +3863,7 @@
best_pred_rd[i] = INT64_MAX;
for (i = 0; i < TX_MODES; i++)
best_tx_rd[i] = INT64_MAX;
- for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = INT64_MAX;
for (i = 0; i < TX_SIZES; i++)
rate_uv_intra[i] = INT_MAX;
@@ -4141,8 +4149,10 @@
tmp_best_sse = total_sse;
tmp_best_skippable = skippable;
tmp_best_mbmode = *mbmi;
- for (i = 0; i < 4; i++)
+ for (i = 0; i < 4; i++) {
tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
+ x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i];
+ }
pred_exists = 1;
if (switchable_filter_index == 0 &&
cpi->sf.use_rd_breakout &&
@@ -4297,7 +4307,7 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < NB_PREDICTION_TYPES; ++i)
best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
- for (i = 0; i <= SWITCHABLE_FILTERS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
}
@@ -4375,7 +4385,7 @@
cm->mcomp_filter_type != BILINEAR) {
int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
SWITCHABLE_FILTERS : cm->mcomp_filter_type];
- for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
int64_t adj_rd;
// In cases of poor prediction, filter_cache[] can contain really big
// values, which actually are bigger than this_rd itself. This can
@@ -4491,7 +4501,7 @@
}
if (!x->skip) {
- for (i = 0; i <= SWITCHABLE_FILTERS; i++) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
if (best_filter_rd[i] == INT64_MAX)
best_filter_diff[i] = 0;
else