Fix conflicts between ext-inter and cb4x4 modes
Resolve the broken coding pipeline in ext-inter experiment when
cb4x4 mode is enabled. Turn off rectangular inter-intra mode.
This needs some more work to hook up. Given that it gives fairly
limited coding performance gains, disable it for the moment.
BUG=aomedia:309
Change-Id: I9b406df6183f75697bfd4eed5125a6e9436d84b0
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index d2e9950..3d90b88 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5391,8 +5391,12 @@
DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_AOM_HIGHBITDEPTH
+#if CONFIG_EXT_INTER && CONFIG_CB4X4
+ (void)ref_mv_sub8x8;
+#endif
+
for (ref = 0; ref < 2; ++ref) {
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
else
@@ -5569,13 +5573,13 @@
#if CONFIG_REF_MV
av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
#endif
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize >= BLOCK_8X8)
#endif // CONFIG_EXT_INTER
*rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
&x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
-#if CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER && !CONFIG_CB4X4
else
*rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
&ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
@@ -7146,8 +7150,8 @@
const uint8_t *src = p->src.buf;
int src_stride = p->src.stride;
const int f_index = bsize - BLOCK_8X8;
- const int bw = 4 << (b_width_log2_lookup[bsize]);
- const int bh = 4 << (b_height_log2_lookup[bsize]);
+ const int bw = block_size_wide[bsize];
+ const int bh = block_size_high[bsize];
uint32_t esq[2][4], var;
int64_t tl, br;
@@ -7340,14 +7344,25 @@
(int64_t)aom_sum_squares_i16(r1, N)) *
(1 << WEDGE_WEIGHT_BITS) / 2;
- av1_wedge_compute_delta_squares(ds, r0, r1, N);
+ if (N < 64)
+ av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
+ else
+ av1_wedge_compute_delta_squares(ds, r0, r1, N);
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
- wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
+
+ // TODO(jingning): Make sse2 functions support N = 16 case
+ if (N < 64)
+ wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
+ else
+ wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
+ if (N < 64)
+ sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
+ else
+ sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
@@ -7405,7 +7420,10 @@
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
- sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
+ if (N < 64)
+ sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
+ else
+ sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
diff --git a/av1/encoder/wedge_utils.c b/av1/encoder/wedge_utils.c
index e45a6a0..e6edbb6 100644
--- a/av1/encoder/wedge_utils.c
+++ b/av1/encoder/wedge_utils.c
@@ -53,7 +53,7 @@
const uint8_t *m, int N) {
uint64_t csse = 0;
int i;
- assert(N % 64 == 0);
+
for (i = 0; i < N; i++) {
int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
t = clamp(t, INT16_MIN, INT16_MAX);
@@ -97,8 +97,6 @@
int64_t limit) {
int64_t acc = 0;
- assert(N % 64 == 0);
-
do {
acc += *ds++ * *m++;
} while (--N);
@@ -122,8 +120,6 @@
const int16_t *b, int N) {
int i;
- assert(N % 64 == 0);
-
for (i = 0; i < N; i++)
d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
}