Merge "Pass AV1_COMMON into get_scan" into nextgenv2
diff --git a/aom_dsp/answriter.h b/aom_dsp/answriter.h
index 298b255..370472a 100644
--- a/aom_dsp/answriter.h
+++ b/aom_dsp/answriter.h
@@ -20,8 +20,23 @@
#include "aom_dsp/ans.h"
#include "aom_dsp/prob.h"
#include "aom_ports/mem_ops.h"
+#include "av1/common/odintrin.h"
-#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
+#if RANS_PRECISION <= OD_DIVU_DMAX
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+ do { \
+ quotient = OD_DIVU_SMALL((dividend), (divisor)); \
+ remainder = (dividend) - (quotient) * (divisor); \
+ } while (0)
+#else
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+ do { \
+ quotient = (dividend) / (divisor); \
+ remainder = (dividend) % (divisor); \
+ } while (0)
+#endif
+
+#define ANS_DIV8(dividend, divisor) OD_DIVU_SMALL((dividend), (divisor))
#ifdef __cplusplus
extern "C" {
@@ -72,9 +87,9 @@
ans->state /= IO_BASE;
}
if (!val)
- ans->state = ANS_DIV(ans->state * ANS_P8_PRECISION, p0);
+ ans->state = ANS_DIV8(ans->state * ANS_P8_PRECISION, p0);
else
- ans->state = ANS_DIV((ans->state + 1) * ANS_P8_PRECISION + p - 1, p) - 1;
+ ans->state = ANS_DIV8((ans->state + 1) * ANS_P8_PRECISION + p - 1, p) - 1;
}
struct rans_sym {
@@ -88,15 +103,17 @@
static INLINE void rans_write(struct AnsCoder *ans,
const struct rans_sym *const sym) {
const aom_cdf_prob p = sym->prob;
+ unsigned quot, rem;
while (ans->state >= L_BASE / RANS_PRECISION * IO_BASE * p) {
ans->buf[ans->buf_offset++] = ans->state % IO_BASE;
ans->state /= IO_BASE;
}
- ans->state =
- (ans->state / p) * RANS_PRECISION + ans->state % p + sym->cum_prob;
+ ANS_DIVREM(quot, rem, ans->state, p);
+ ans->state = quot * RANS_PRECISION + rem + sym->cum_prob;
}
-#undef ANS_DIV
+#undef ANS_DIV8
+#undef ANS_DIVREM
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index 3f2feab..18563b2 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -206,6 +206,7 @@
od_ec_window dif;
od_ec_window vw;
unsigned r;
+ unsigned r_new;
unsigned v;
int ret;
OD_ASSERT(0 < fz);
@@ -216,10 +217,14 @@
OD_ASSERT(32768U <= r);
v = fz * (uint32_t)r >> 15;
vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
- ret = dif >= vw;
- if (ret) dif -= vw;
- r = ret ? r - v : v;
- return od_ec_dec_normalize(dec, dif, r, ret);
+ ret = 0;
+ r_new = v;
+ if (dif >= vw) {
+ r_new = r - v;
+ dif -= vw;
+ ret = 1;
+ }
+ return od_ec_dec_normalize(dec, dif, r_new, ret);
}
/*Decodes a symbol given a cumulative distribution function (CDF) table.
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 4d9bff9..bc1970c 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -191,7 +191,6 @@
TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
#endif
int8_t skip;
- int8_t has_no_coeffs;
int8_t segment_id;
#if CONFIG_SUPERTX
// Minimum of all segment IDs under the current supertx block.
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index 4165e35..f068ee7 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -444,7 +444,12 @@
static const int tx_size_1d[TX_SIZES] = { 4, 8, 16, 32 };
-static const int tx_size_2d[TX_SIZES] = { 16, 64, 256, 1024 };
+static const int tx_size_2d[TX_SIZES_ALL] = {
+ 16, 64, 256, 1024,
+#if CONFIG_EXT_TX
+ 32, 32, 128, 128, 512, 512,
+#endif
+};
static const uint8_t tx_size_1d_log2[TX_SIZES] = { 2, 3, 4, 5 };
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index c8022f2..d0b897c 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -753,7 +753,7 @@
// If the block has no coefficients and is not intra we skip applying
// the loop filter on block edges.
- if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
+ if (mbmi->skip && is_inter_block(mbmi)) return;
// Here we are adding a mask for the transform size. The transform
// size mask is set to be correct for a 64x64 prediction block size. We
@@ -818,7 +818,7 @@
*above_y |= above_prediction_mask[block_size] << shift_y;
*left_y |= left_prediction_mask[block_size] << shift_y;
- if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
+ if (mbmi->skip && is_inter_block(mbmi)) return;
*above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
<< shift_y;
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index c0fc494..b07a8bd 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -647,6 +647,87 @@
}
#endif
+#if CONFIG_SUB8X8_MC
+ if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) {
+ // block size in log2
+ const int b4_wl = b_width_log2_lookup[mi->mbmi.sb_type];
+ const int b4_hl = b_height_log2_lookup[mi->mbmi.sb_type];
+ const int b8_sl = b_width_log2_lookup[BLOCK_8X8];
+
+ // block size
+ const int b4_w = 1 << b4_wl;
+ const int b4_h = 1 << b4_hl;
+ const int b8_s = 1 << b8_sl;
+ int idx, idy;
+
+ const int x_base = x;
+ const int y_base = y;
+
+ // processing unit size
+ const int x_step = w >> (b8_sl - b4_wl);
+ const int y_step = h >> (b8_sl - b4_hl);
+
+ for (idy = 0; idy < b8_s; idy += b4_h) {
+ for (idx = 0; idx < b8_s; idx += b4_w) {
+ const int chr_idx = (idy * 2) + idx;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *dst = dst_buf->buf;
+ const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv;
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(
+ xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+ uint8_t *pre;
+ MV32 scaled_mv;
+ int xs, ys, subpel_x, subpel_y;
+ const int is_scaled = av1_is_scaled(sf);
+
+ x = x_base + idx * x_step;
+ y = y_base + idy * y_step;
+
+ dst += dst_buf->stride * y + x;
+
+ if (is_scaled) {
+ pre =
+ pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ pre = pre_buf->buf + y * pre_buf->stride + x;
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+ xs = ys = 16;
+ }
+
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+ pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+ (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_AOM_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+ subpel_x, subpel_y, sf, x_step, y_step, ref,
+ &mi->mbmi.interp_filter, xs, ys, xd->bd);
+ } else {
+ inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+ subpel_x, subpel_y, sf, x_step, y_step, ref,
+ &mi->mbmi.interp_filter, xs, ys);
+ }
+#else
+ inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x,
+ subpel_y, sf, x_step, y_step, ref,
+ &mi->mbmi.interp_filter, xs, ys);
+#endif
+ }
+ }
+ }
+ return;
+ }
+#endif
+
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index bfa7e95..5f62f0a 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -50,7 +50,7 @@
const int16_t *kernel_y =
av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
#else
- if (interp_filter_params.taps == SUBPEL_TAPS) {
+ if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2) {
const int16_t *kernel_x =
av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
const int16_t *kernel_y =
@@ -109,7 +109,7 @@
const int16_t *kernel_y =
av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
#else
- if (interp_filter_params.taps == SUBPEL_TAPS) {
+ if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2) {
const int16_t *kernel_x =
av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
const int16_t *kernel_y =
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index bbd788b..2e6e744 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -253,22 +253,12 @@
}
#endif // CONFIG_AOM_HIGHBITDEPTH
- if (eob == 1) {
+ // TODO(jingning): This cleans up different reset requests from various
+ // experiments, but incurs unnecessary memset size.
+ if (eob == 1)
dqcoeff[0] = 0;
- } else {
- if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * 4 * num_4x4_blocks_wide_txsize_lookup[tx_size] *
- sizeof(dqcoeff[0]));
-#if CONFIG_EXT_TX
- else
- memset(dqcoeff, 0, get_tx2d_size(tx_size) * sizeof(dqcoeff[0]));
-#else
- else if (tx_size == TX_32X32 && eob <= 34)
- memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
- memset(dqcoeff, 0, get_tx2d_size(tx_size) * sizeof(dqcoeff[0]));
-#endif
- }
+ else
+ memset(dqcoeff, 0, tx_size_2d[tx_size] * sizeof(dqcoeff[0]));
}
}
@@ -1154,7 +1144,6 @@
#endif // CONFIG_EXT_PARTITION_TYPES
BLOCK_SIZE bsize, int bwl, int bhl) {
AV1_COMMON *const cm = &pbi->common;
- const int less8x8 = bsize < BLOCK_8X8;
const int bw = 1 << (bwl - 1);
const int bh = 1 << (bhl - 1);
const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
@@ -1374,9 +1363,6 @@
plane, row, col, tx_size);
#endif
}
-
- if (!less8x8 && eobtotal == 0)
- mbmi->has_no_coeffs = 1; // skip loopfilter
}
}
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index fdbb4dd..9dc6a2e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4489,6 +4489,10 @@
#define MIN_TRANS_THRESH 8
#define GLOBAL_MOTION_ADVANTAGE_THRESH 0.60
#define GLOBAL_MOTION_MODEL ROTZOOM
+// TODO(sarahparker) This function needs to be adjusted
+// to accomodate changes in the paraemter integerization.
+// Commenting it out until the fix is made.
+/*
static void refine_integerized_param(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
int use_hbd, int bd,
@@ -4565,6 +4569,7 @@
*param = best_param;
}
}
+*/
static void convert_to_params(const double *params, TransformationType type,
int16_t *model) {
@@ -4579,7 +4584,7 @@
GM_TRANS_DECODE_FACTOR;
for (i = 2; i < n_params; ++i) {
- diag_value = ((i && 1) ? (1 << GM_ALPHA_PREC_BITS) : 0);
+ diag_value = ((i & 1) ? (1 << GM_ALPHA_PREC_BITS) : 0);
model[i] = (int16_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
model[i] =
(int16_t)(clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX) +
@@ -4643,14 +4648,6 @@
convert_model_to_params(params, GLOBAL_MOTION_MODEL,
&cm->global_motion[frame]);
if (get_gmtype(&cm->global_motion[frame]) > GLOBAL_ZERO) {
- refine_integerized_param(
- &cm->global_motion[frame].motion_params,
-#if CONFIG_AOM_HIGHBITDEPTH
- xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
-#endif // CONFIG_AOM_HIGHBITDEPTH
- ref_buf->y_buffer, ref_buf->y_width, ref_buf->y_height,
- ref_buf->y_stride, cpi->Source->y_buffer, cpi->Source->y_width,
- cpi->Source->y_height, cpi->Source->y_stride, 3);
// compute the advantage of using gm parameters over 0 motion
erroradvantage = av1_warp_erroradv(
&cm->global_motion[frame].motion_params,