Merge "HBD hybrid transform 8x8 SSE4.1 optimization" into nextgenv2
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index f7eb141..377d199 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -53,31 +53,41 @@
}
#if CONFIG_EXT_INTER
-#define WEDGE_BITS_SML 3
-#define WEDGE_BITS_MED 4
-#define WEDGE_BITS_BIG 5
+#define WEDGE_BITS_SML 2
+#define WEDGE_BITS_MED 3
+#define WEDGE_BITS_BIG 4
#define WEDGE_NONE -1
#define WEDGE_WEIGHT_BITS 6
-static INLINE int get_wedge_bits(BLOCK_SIZE sb_type) {
- if (sb_type < BLOCK_8X8)
- return 0;
- if (sb_type <= BLOCK_8X8)
- return WEDGE_BITS_SML;
- else if (sb_type <= BLOCK_32X32)
- return WEDGE_BITS_MED;
- else
- return WEDGE_BITS_BIG;
-}
+static const int get_wedge_bits_lookup[BLOCK_SIZES] = {
+ 0,
+ 0,
+ 0,
+ WEDGE_BITS_SML,
+ WEDGE_BITS_MED,
+ WEDGE_BITS_MED,
+ WEDGE_BITS_MED,
+ WEDGE_BITS_MED,
+ WEDGE_BITS_MED,
+ WEDGE_BITS_MED,
+ WEDGE_BITS_BIG,
+ WEDGE_BITS_BIG,
+ WEDGE_BITS_BIG,
+#if CONFIG_EXT_PARTITION
+ WEDGE_BITS_BIG,
+ WEDGE_BITS_BIG,
+ WEDGE_BITS_BIG,
+#endif // CONFIG_EXT_PARTITION
+};
static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
(void) sb_type;
- return get_wedge_bits(sb_type) > 0;
+ return get_wedge_bits_lookup[sb_type] > 0;
}
static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
(void) sb_type;
- return 0; // get_wedge_bits(sb_type) > 0;
+ return get_wedge_bits_lookup[sb_type] > 0;
}
static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
@@ -205,9 +215,10 @@
// TODO(debargha): Consolidate these flags
int use_wedge_interintra;
int interintra_wedge_index;
- int interintra_uv_wedge_index;
+ int interintra_wedge_sign;
int use_wedge_interinter;
int interinter_wedge_index;
+ int interinter_wedge_sign;
#endif // CONFIG_EXT_INTER
#if CONFIG_OBMC
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index e5e2442..69857ea 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -44,6 +44,11 @@
return smoothfn[m + SMOOTHER_LEN];
}
+#define WEDGE_OBLIQUE 1
+#define WEDGE_STRAIGHT 0
+
+#define WEDGE_PARMS 5
+
// [negative][transpose][reverse]
DECLARE_ALIGNED(16, static uint8_t,
wedge_mask_obl[2][2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
@@ -51,6 +56,7 @@
DECLARE_ALIGNED(16, static uint8_t,
wedge_mask_str[2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
void vp10_init_wedge_masks() {
int i, j;
const int w = MASK_MASTER_SIZE;
@@ -62,289 +68,208 @@
int x = (2 * j + 1 - (a[2] * w) / 2);
int y = (2 * i + 1 - (a[3] * h) / 2);
int m = (a[0] * x + a[1] * y) / 2;
- wedge_mask_obl[0][0][0][i * stride + j] =
- wedge_mask_obl[0][1][0][j * stride + i] =
- wedge_mask_obl[0][0][1][i * stride + w - 1 - j] =
- wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] =
- get_masked_weight(m);
wedge_mask_obl[1][0][0][i * stride + j] =
- wedge_mask_obl[1][1][0][j * stride + i] =
- wedge_mask_obl[1][0][1][i * stride + w - 1 - j] =
- wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] =
+ wedge_mask_obl[1][1][0][j * stride + i] =
+ get_masked_weight(m);
+ wedge_mask_obl[1][0][1][i * stride + w - 1 - j] =
+ wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] =
(1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
- wedge_mask_str[0][0][i * stride + j] =
- wedge_mask_str[0][1][j * stride + i] =
- get_masked_weight(x);
+ wedge_mask_obl[0][0][0][i * stride + j] =
+ wedge_mask_obl[0][1][0][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
+ wedge_mask_obl[0][0][1][i * stride + w - 1 - j] =
+ wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] =
+ get_masked_weight(m);
wedge_mask_str[1][0][i * stride + j] =
- wedge_mask_str[1][1][j * stride + i] =
+ wedge_mask_str[1][1][j * stride + i] =
+ get_masked_weight(x);
+ wedge_mask_str[0][0][i * stride + j] =
+ wedge_mask_str[0][1][j * stride + i] =
(1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x);
}
}
+static const int wedge_params_sml[1 << WEDGE_BITS_SML]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+};
+
+static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+
+ {WEDGE_OBLIQUE, 1, 1, 2, 1},
+ {WEDGE_OBLIQUE, 1, 1, 2, 3},
+ {WEDGE_OBLIQUE, 1, 0, 2, 1},
+ {WEDGE_OBLIQUE, 1, 0, 2, 3},
+};
+
+static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+
+ {WEDGE_OBLIQUE, 0, 1, 1, 2},
+ {WEDGE_OBLIQUE, 0, 1, 3, 2},
+ {WEDGE_OBLIQUE, 0, 0, 1, 2},
+ {WEDGE_OBLIQUE, 0, 0, 3, 2},
+};
+
+static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+
+ {WEDGE_STRAIGHT, 1, 0, 2, 1},
+ {WEDGE_STRAIGHT, 1, 0, 2, 3},
+ {WEDGE_STRAIGHT, 0, 0, 1, 2},
+ {WEDGE_STRAIGHT, 0, 0, 3, 2},
+};
+
+static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+
+ {WEDGE_OBLIQUE, 1, 1, 2, 1},
+ {WEDGE_OBLIQUE, 1, 1, 2, 3},
+ {WEDGE_OBLIQUE, 1, 0, 2, 1},
+ {WEDGE_OBLIQUE, 1, 0, 2, 3},
+
+ {WEDGE_OBLIQUE, 0, 1, 1, 2},
+ {WEDGE_OBLIQUE, 0, 1, 3, 2},
+ {WEDGE_OBLIQUE, 0, 0, 1, 2},
+ {WEDGE_OBLIQUE, 0, 0, 3, 2},
+
+ {WEDGE_STRAIGHT, 1, 0, 2, 1},
+ {WEDGE_STRAIGHT, 1, 0, 2, 2},
+ {WEDGE_STRAIGHT, 1, 0, 2, 3},
+ {WEDGE_STRAIGHT, 0, 0, 2, 2},
+};
+
+static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+
+ {WEDGE_OBLIQUE, 1, 1, 2, 1},
+ {WEDGE_OBLIQUE, 1, 1, 2, 3},
+ {WEDGE_OBLIQUE, 1, 0, 2, 1},
+ {WEDGE_OBLIQUE, 1, 0, 2, 3},
+
+ {WEDGE_OBLIQUE, 0, 1, 1, 2},
+ {WEDGE_OBLIQUE, 0, 1, 3, 2},
+ {WEDGE_OBLIQUE, 0, 0, 1, 2},
+ {WEDGE_OBLIQUE, 0, 0, 3, 2},
+
+ {WEDGE_STRAIGHT, 0, 0, 1, 2},
+ {WEDGE_STRAIGHT, 0, 0, 2, 2},
+ {WEDGE_STRAIGHT, 0, 0, 3, 2},
+ {WEDGE_STRAIGHT, 1, 0, 2, 2},
+};
+
+static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG]
+ [WEDGE_PARMS] = {
+ {WEDGE_OBLIQUE, 1, 1, 2, 2},
+ {WEDGE_OBLIQUE, 1, 0, 2, 2},
+ {WEDGE_OBLIQUE, 0, 1, 2, 2},
+ {WEDGE_OBLIQUE, 0, 0, 2, 2},
+
+ {WEDGE_OBLIQUE, 1, 1, 2, 1},
+ {WEDGE_OBLIQUE, 1, 1, 2, 3},
+ {WEDGE_OBLIQUE, 1, 0, 2, 1},
+ {WEDGE_OBLIQUE, 1, 0, 2, 3},
+
+ {WEDGE_OBLIQUE, 0, 1, 1, 2},
+ {WEDGE_OBLIQUE, 0, 1, 3, 2},
+ {WEDGE_OBLIQUE, 0, 0, 1, 2},
+ {WEDGE_OBLIQUE, 0, 0, 3, 2},
+
+ {WEDGE_STRAIGHT, 1, 0, 2, 1},
+ {WEDGE_STRAIGHT, 1, 0, 2, 3},
+ {WEDGE_STRAIGHT, 0, 0, 1, 2},
+ {WEDGE_STRAIGHT, 0, 0, 3, 2},
+};
+
+static const int *get_wedge_params_lookup[BLOCK_SIZES] = {
+ NULL,
+ NULL,
+ NULL,
+ &wedge_params_sml[0][0],
+ &wedge_params_med_hgtw[0][0],
+ &wedge_params_med_hltw[0][0],
+ &wedge_params_med_heqw[0][0],
+ &wedge_params_med_hgtw[0][0],
+ &wedge_params_med_hltw[0][0],
+ &wedge_params_med_heqw[0][0],
+ &wedge_params_big_hgtw[0][0],
+ &wedge_params_big_hltw[0][0],
+ &wedge_params_big_heqw[0][0],
+#if CONFIG_EXT_PARTITION
+ &wedge_params_big_hgtw[0][0],
+ &wedge_params_big_hltw[0][0],
+ &wedge_params_big_heqw[0][0],
+#endif // CONFIG_EXT_PARTITION
+};
+
static const uint8_t *get_wedge_mask_inplace(const int *a,
+ int neg,
int h, int w) {
- const int woff = (a[2] * w) >> 2;
- const int hoff = (a[3] * h) >> 2;
- const int oblique = (abs(a[0]) + abs(a[1]) == 3);
const uint8_t *master;
- int transpose, reverse, negative;
- if (oblique) {
- negative = (a[0] < 0);
- transpose = (abs(a[0]) == 1);
- reverse = (a[0] < 0) ^ (a[1] < 0);
- } else {
- negative = (a[0] < 0 || a[1] < 0);
- transpose = (a[0] == 0);
- reverse = 0;
- }
- master = (oblique ?
- wedge_mask_obl[negative][transpose][reverse] :
- wedge_mask_str[negative][transpose]) +
+ const int woff = (a[3] * w) >> 2;
+ const int hoff = (a[4] * h) >> 2;
+ if (!a) return NULL;
+ master = (a[0] ?
+ wedge_mask_obl[neg][a[1]][a[2]] :
+ wedge_mask_str[neg][a[1]]) +
MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
MASK_MASTER_SIZE / 2 - woff;
return master;
}
-// Equation of line: f(x, y) = a[0]*(x - a[2]*w/4) + a[1]*(y - a[3]*h/4) = 0
-// The soft mask is obtained by computing f(x, y) and then calling
-// get_masked_weight(f(x, y)).
-static const int wedge_params_sml[1 << WEDGE_BITS_SML][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-};
-
-static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-
- {-1, 2, 2, 1},
- { 1, -2, 2, 1},
- {-1, 2, 2, 3},
- { 1, -2, 2, 3},
- {-1, -2, 2, 1},
- { 1, 2, 2, 1},
- {-1, -2, 2, 3},
- { 1, 2, 2, 3},
-};
-
-static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-
- {-2, 1, 1, 2},
- { 2, -1, 1, 2},
- {-2, 1, 3, 2},
- { 2, -1, 3, 2},
- {-2, -1, 1, 2},
- { 2, 1, 1, 2},
- {-2, -1, 3, 2},
- { 2, 1, 3, 2},
-};
-
-static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-
- { 0, -2, 0, 1},
- { 0, 2, 0, 1},
- { 0, -2, 0, 3},
- { 0, 2, 0, 3},
- {-2, 0, 1, 0},
- { 2, 0, 1, 0},
- {-2, 0, 3, 0},
- { 2, 0, 3, 0},
-};
-
-static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-
- {-1, 2, 2, 1},
- { 1, -2, 2, 1},
- {-1, 2, 2, 3},
- { 1, -2, 2, 3},
- {-1, -2, 2, 1},
- { 1, 2, 2, 1},
- {-1, -2, 2, 3},
- { 1, 2, 2, 3},
-
- {-2, 1, 1, 2},
- { 2, -1, 1, 2},
- {-2, 1, 3, 2},
- { 2, -1, 3, 2},
- {-2, -1, 1, 2},
- { 2, 1, 1, 2},
- {-2, -1, 3, 2},
- { 2, 1, 3, 2},
-
- { 0, -2, 0, 1},
- { 0, 2, 0, 1},
- { 0, -2, 0, 2},
- { 0, 2, 0, 2},
- { 0, -2, 0, 3},
- { 0, 2, 0, 3},
- {-2, 0, 2, 0},
- { 2, 0, 2, 0},
-};
-
-static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-
- {-1, 2, 2, 1},
- { 1, -2, 2, 1},
- {-1, 2, 2, 3},
- { 1, -2, 2, 3},
- {-1, -2, 2, 1},
- { 1, 2, 2, 1},
- {-1, -2, 2, 3},
- { 1, 2, 2, 3},
-
- {-2, 1, 1, 2},
- { 2, -1, 1, 2},
- {-2, 1, 3, 2},
- { 2, -1, 3, 2},
- {-2, -1, 1, 2},
- { 2, 1, 1, 2},
- {-2, -1, 3, 2},
- { 2, 1, 3, 2},
-
- { 0, -2, 0, 2},
- { 0, 2, 0, 2},
- {-2, 0, 1, 0},
- { 2, 0, 1, 0},
- {-2, 0, 2, 0},
- { 2, 0, 2, 0},
- {-2, 0, 3, 0},
- { 2, 0, 3, 0},
-};
-
-static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG][4] = {
- {-1, 2, 2, 2},
- { 1, -2, 2, 2},
- {-2, 1, 2, 2},
- { 2, -1, 2, 2},
- {-2, -1, 2, 2},
- { 2, 1, 2, 2},
- {-1, -2, 2, 2},
- { 1, 2, 2, 2},
-
- {-1, 2, 2, 1},
- { 1, -2, 2, 1},
- {-1, 2, 2, 3},
- { 1, -2, 2, 3},
- {-1, -2, 2, 1},
- { 1, 2, 2, 1},
- {-1, -2, 2, 3},
- { 1, 2, 2, 3},
-
- {-2, 1, 1, 2},
- { 2, -1, 1, 2},
- {-2, 1, 3, 2},
- { 2, -1, 3, 2},
- {-2, -1, 1, 2},
- { 2, 1, 1, 2},
- {-2, -1, 3, 2},
- { 2, 1, 3, 2},
-
- { 0, -2, 0, 1},
- { 0, 2, 0, 1},
- { 0, -2, 0, 3},
- { 0, 2, 0, 3},
- {-2, 0, 1, 0},
- { 2, 0, 1, 0},
- {-2, 0, 3, 0},
- { 2, 0, 3, 0},
-};
-
static const int *get_wedge_params(int wedge_index,
- BLOCK_SIZE sb_type,
- int h, int w) {
+ BLOCK_SIZE sb_type) {
const int *a = NULL;
- const int wedge_bits = get_wedge_bits(sb_type);
-
- if (wedge_index == WEDGE_NONE)
- return NULL;
-
- if (wedge_bits == WEDGE_BITS_SML) {
- a = wedge_params_sml[wedge_index];
- } else if (wedge_bits == WEDGE_BITS_MED) {
- if (h > w)
- a = wedge_params_med_hgtw[wedge_index];
- else if (h < w)
- a = wedge_params_med_hltw[wedge_index];
- else
- a = wedge_params_med_heqw[wedge_index];
- } else if (wedge_bits == WEDGE_BITS_BIG) {
- if (h > w)
- a = wedge_params_big_hgtw[wedge_index];
- else if (h < w)
- a = wedge_params_big_hltw[wedge_index];
- else
- a = wedge_params_big_heqw[wedge_index];
- } else {
- assert(0);
+ if (wedge_index != WEDGE_NONE) {
+ return get_wedge_params_lookup[sb_type] + WEDGE_PARMS * wedge_index;
}
return a;
}
const uint8_t *vp10_get_soft_mask(int wedge_index,
+ int wedge_sign,
BLOCK_SIZE sb_type,
int h, int w) {
- const int *a = get_wedge_params(wedge_index, sb_type, h, w);
- if (a) {
- return get_wedge_mask_inplace(a, h, w);
- } else {
- return NULL;
- }
+ const int *a = get_wedge_params(wedge_index, sb_type);
+ return get_wedge_mask_inplace(a, wedge_sign, h, w);
}
#if CONFIG_SUPERTX
-const uint8_t *get_soft_mask_extend(int wedge_index, int plane,
+const uint8_t *get_soft_mask_extend(int wedge_index,
+ int wedge_sign,
+ int plane,
BLOCK_SIZE sb_type,
int wedge_offset_y,
int wedge_offset_x) {
int subh = (plane ? 2 : 4) << b_height_log2_lookup[sb_type];
int subw = (plane ? 2 : 4) << b_width_log2_lookup[sb_type];
- const int *a = get_wedge_params(wedge_index, sb_type, subh, subw);
+ const int *a = get_wedge_params(wedge_index, sb_type);
if (a) {
- const uint8_t *mask = get_wedge_mask_inplace(a, subh, subw);
+ const uint8_t *mask = get_wedge_mask_inplace(a, wedge_sign, subh, subw);
mask -= (wedge_offset_x + wedge_offset_y * MASK_MASTER_STRIDE);
return mask;
} else {
@@ -355,12 +280,14 @@
static void build_masked_compound_extend(uint8_t *dst, int dst_stride,
uint8_t *dst2, int dst2_stride,
int plane,
- int wedge_index, BLOCK_SIZE sb_type,
+ int wedge_index,
+ int wedge_sign,
+ BLOCK_SIZE sb_type,
int wedge_offset_y, int wedge_offset_x,
int h, int w) {
int i, j;
const uint8_t *mask = get_soft_mask_extend(
- wedge_index, plane, sb_type, wedge_offset_y, wedge_offset_x);
+ wedge_index, wedge_sign, plane, sb_type, wedge_offset_y, wedge_offset_x);
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) {
int m = mask[i * MASK_MASTER_STRIDE + j];
@@ -376,12 +303,12 @@
static void build_masked_compound_extend_highbd(
uint8_t *dst_8, int dst_stride,
uint8_t *dst2_8, int dst2_stride, int plane,
- int wedge_index, BLOCK_SIZE sb_type,
+ int wedge_index, int wedge_sign, BLOCK_SIZE sb_type,
int wedge_offset_y, int wedge_offset_x,
int h, int w) {
int i, j;
const uint8_t *mask = get_soft_mask_extend(
- wedge_index, plane, sb_type, wedge_offset_y, wedge_offset_x);
+ wedge_index, wedge_sign, plane, sb_type, wedge_offset_y, wedge_offset_x);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
for (i = 0; i < h; ++i)
@@ -400,10 +327,12 @@
static void build_masked_compound(uint8_t *dst, int dst_stride,
uint8_t *dst2, int dst2_stride,
- int wedge_index, BLOCK_SIZE sb_type,
+ int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type,
int h, int w) {
int i, j;
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, sb_type, h, w);
+ const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
+ sb_type, h, w);
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) {
int m = mask[i * MASK_MASTER_STRIDE + j];
@@ -418,10 +347,12 @@
#if CONFIG_VP9_HIGHBITDEPTH
static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride,
uint8_t *dst2_8, int dst2_stride,
- int wedge_index, BLOCK_SIZE sb_type,
+ int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type,
int h, int w) {
int i, j;
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, sb_type, h, w);
+ const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
+ sb_type, h, w);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
for (i = 0; i < h; ++i)
@@ -466,12 +397,14 @@
build_masked_compound_extend_highbd(
dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
else
build_masked_compound_extend(
dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
#else
@@ -479,11 +412,13 @@
build_masked_compound_highbd(
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
else
build_masked_compound(
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX
#else // CONFIG_VP9_HIGHBITDEPTH
@@ -495,12 +430,14 @@
build_masked_compound_extend(
dst, dst_stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
#else
build_masked_compound(
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -614,7 +551,7 @@
+ (scaled_mv.col >> SUBPEL_BITS);
#if CONFIG_EXT_INTER
- if (ref && get_wedge_bits(mi->mbmi.sb_type) &&
+ if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
mi->mbmi.use_wedge_interinter)
vp10_make_masked_inter_predictor(
pre, pre_buf->stride, dst, dst_buf->stride,
@@ -1406,7 +1343,7 @@
void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
if (is_interintra_pred(mbmi)) {
mbmi->ref_frame[1] = NONE;
- } else if (has_second_ref(mbmi) && get_wedge_bits(mbmi->sb_type) &&
+ } else if (has_second_ref(mbmi) && is_interinter_wedge_used(mbmi->sb_type) &&
mbmi->use_wedge_interinter) {
mbmi->use_wedge_interinter = 0;
mbmi->ref_frame[1] = NONE;
@@ -1663,6 +1600,7 @@
static void combine_interintra(INTERINTRA_MODE mode,
int use_wedge_interintra,
int wedge_index,
+ int wedge_sign,
BLOCK_SIZE bsize,
BLOCK_SIZE plane_bsize,
uint8_t *comppred,
@@ -1680,8 +1618,9 @@
int i, j;
if (use_wedge_interintra) {
- if (get_wedge_bits(bsize)) {
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw);
+ if (is_interinter_wedge_used(bsize)) {
+ const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
+ bsize, bh, bw);
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int m = mask[i * MASK_MASTER_STRIDE + j];
@@ -1790,6 +1729,7 @@
static void combine_interintra_highbd(INTERINTRA_MODE mode,
int use_wedge_interintra,
int wedge_index,
+ int wedge_sign,
BLOCK_SIZE bsize,
BLOCK_SIZE plane_bsize,
uint8_t *comppred8,
@@ -1812,8 +1752,9 @@
(void) bd;
if (use_wedge_interintra) {
- if (get_wedge_bits(bsize)) {
- const uint8_t *mask = vp10_get_soft_mask(wedge_index, bsize, bh, bw);
+ if (is_interinter_wedge_used(bsize)) {
+ const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
+ bsize, bh, bw);
for (i = 0; i < bh; ++i) {
for (j = 0; j < bw; ++j) {
int m = mask[i * MASK_MASTER_STRIDE + j];
@@ -2019,6 +1960,7 @@
combine_interintra_highbd(xd->mi[0]->mbmi.interintra_mode,
xd->mi[0]->mbmi.use_wedge_interintra,
xd->mi[0]->mbmi.interintra_wedge_index,
+ xd->mi[0]->mbmi.interintra_wedge_sign,
bsize,
plane_bsize,
xd->plane[plane].dst.buf,
@@ -2032,6 +1974,7 @@
combine_interintra(xd->mi[0]->mbmi.interintra_mode,
xd->mi[0]->mbmi.use_wedge_interintra,
xd->mi[0]->mbmi.interintra_wedge_index,
+ xd->mi[0]->mbmi.interintra_wedge_sign,
bsize,
plane_bsize,
xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
@@ -2236,7 +2179,7 @@
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- if (ref && get_wedge_bits(mi->mbmi.sb_type)
+ if (ref && is_interinter_wedge_used(mi->mbmi.sb_type)
&& mi->mbmi.use_wedge_interinter) {
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
@@ -2273,12 +2216,14 @@
build_masked_compound_extend_highbd(
dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
} else {
build_masked_compound_extend(
dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
}
@@ -2286,6 +2231,7 @@
build_masked_compound_extend(dst, dst_buf->stride, tmp_dst,
MAX_SB_SIZE, plane,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_y, wedge_offset_x, h, w);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -2295,11 +2241,13 @@
build_masked_compound_highbd(dst, dst_buf->stride, tmp_dst,
MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
build_masked_compound(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
#endif // CONFIG_SUPERTX
} else {
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 9067c4b..c5b455e 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -410,6 +410,7 @@
void vp10_init_wedge_masks();
const uint8_t *vp10_get_soft_mask(int wedge_index,
+ int wedge_sign,
BLOCK_SIZE sb_type,
int h, int w);
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 14af4b3..9b6aacb 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -1566,8 +1566,8 @@
xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
if (mbmi->use_wedge_interintra) {
mbmi->interintra_wedge_index =
- mbmi->interintra_uv_wedge_index =
- vp10_read_literal(r, get_wedge_bits(bsize));
+ vp10_read_literal(r, get_wedge_bits_lookup[bsize]);
+ mbmi->interintra_wedge_sign = 0;
}
}
}
@@ -1599,7 +1599,8 @@
xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
if (mbmi->use_wedge_interinter) {
mbmi->interinter_wedge_index =
- vp10_read_literal(r, get_wedge_bits(bsize));
+ vp10_read_literal(r, get_wedge_bits_lookup[bsize]);
+ mbmi->interinter_wedge_sign = vp10_read_bit(r);
}
}
#endif // CONFIG_EXT_INTER
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 7b036f3..c8fb2ca 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1316,7 +1316,8 @@
cm->fc->wedge_interintra_prob[bsize]);
if (mbmi->use_wedge_interintra) {
vp10_write_literal(w, mbmi->interintra_wedge_index,
- get_wedge_bits(bsize));
+ get_wedge_bits_lookup[bsize]);
+ assert(mbmi->interintra_wedge_sign == 0);
}
}
}
@@ -1343,9 +1344,11 @@
is_interinter_wedge_used(bsize)) {
vp10_write(w, mbmi->use_wedge_interinter,
cm->fc->wedge_interinter_prob[bsize]);
- if (mbmi->use_wedge_interinter)
+ if (mbmi->use_wedge_interinter) {
vp10_write_literal(w, mbmi->interinter_wedge_index,
- get_wedge_bits(bsize));
+ get_wedge_bits_lookup[bsize]);
+ vp10_write_bit(w, mbmi->interinter_wedge_sign);
+ }
}
#endif // CONFIG_EXT_INTER
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 4866f71..f1d203d 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -6108,6 +6108,7 @@
static void do_masked_motion_search_indexed(VP10_COMP *cpi, MACROBLOCK *x,
int wedge_index,
+ int wedge_sign,
BLOCK_SIZE bsize,
int mi_row, int mi_col,
int_mv *tmp_mv, int *rate_mv,
@@ -6121,7 +6122,7 @@
int h = (4 << b_height_log2_lookup[sb_type]);
const uint8_t *mask;
const int mask_stride = MASK_MASTER_STRIDE;
- mask = vp10_get_soft_mask(wedge_index, sb_type, h, w);
+ mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type, h, w);
if (which == 0 || which == 2)
do_masked_motion_search(cpi, x, mask, mask_stride, bsize,
@@ -6130,7 +6131,7 @@
if (which == 1 || which == 2) {
// get the negative mask
- mask = vp10_get_soft_mask(wedge_index ^ 1, sb_type, h, w);
+ mask = vp10_get_soft_mask(wedge_index, !wedge_sign, sb_type, h, w);
do_masked_motion_search(cpi, x, mask, mask_stride, bsize,
mi_row, mi_col, &tmp_mv[1], &rate_mv[1],
1, mv_idx[1]);
@@ -6790,9 +6791,9 @@
rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
best_rd_nowedge = rd;
mbmi->use_wedge_interinter = 1;
- rs = get_wedge_bits(bsize) * 256 +
+ rs = (1 + get_wedge_bits_lookup[bsize]) * 256 +
vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
- wedge_types = (1 << get_wedge_bits(bsize));
+ wedge_types = (1 << get_wedge_bits_lookup[bsize]);
if (have_newmv_in_inter_mode(this_mode)) {
int_mv tmp_mv[2];
int rate_mvs[2], tmp_rate_mv = 0;
@@ -6810,8 +6811,9 @@
vp10_build_inter_predictors_for_planes_single_buf(
xd, bsize, mi_row, mi_col, 1, preds1, strides);
- for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- mbmi->interinter_wedge_index = wedge_index;
+ for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
+ mbmi->interinter_wedge_index = wedge_index >> 1;
+ mbmi->interinter_wedge_sign = wedge_index & 1;
vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col,
preds0, strides,
preds1, strides);
@@ -6823,10 +6825,13 @@
best_rd_wedge = rd;
}
}
- mbmi->interinter_wedge_index = best_wedge_index;
+ mbmi->interinter_wedge_index = best_wedge_index >> 1;
+ mbmi->interinter_wedge_sign = best_wedge_index & 1;
if (this_mode == NEW_NEWMV) {
int mv_idxs[2] = {0, 0};
- do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index,
+ do_masked_motion_search_indexed(cpi, x,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
bsize, mi_row, mi_col, tmp_mv, rate_mvs,
mv_idxs, 2);
tmp_rate_mv = rate_mvs[0] + rate_mvs[1];
@@ -6834,14 +6839,18 @@
mbmi->mv[1].as_int = tmp_mv[1].as_int;
} else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
int mv_idxs[2] = {0, 0};
- do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index,
+ do_masked_motion_search_indexed(cpi, x,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
bsize, mi_row, mi_col, tmp_mv, rate_mvs,
mv_idxs, 0);
tmp_rate_mv = rate_mvs[0];
mbmi->mv[0].as_int = tmp_mv[0].as_int;
} else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
int mv_idxs[2] = {0, 0};
- do_masked_motion_search_indexed(cpi, x, mbmi->interinter_wedge_index,
+ do_masked_motion_search_indexed(cpi, x,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
bsize, mi_row, mi_col, tmp_mv, rate_mvs,
mv_idxs, 1);
tmp_rate_mv = rate_mvs[1];
@@ -6860,7 +6869,8 @@
}
if (best_rd_wedge < best_rd_nowedge) {
mbmi->use_wedge_interinter = 1;
- mbmi->interinter_wedge_index = best_wedge_index;
+ mbmi->interinter_wedge_index = best_wedge_index >> 1;
+ mbmi->interinter_wedge_sign = best_wedge_index & 1;
xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
*rate2 += tmp_rate_mv - rate_mv;
@@ -6884,8 +6894,9 @@
xd, bsize, mi_row, mi_col, 0, preds0, strides);
vp10_build_inter_predictors_for_planes_single_buf(
xd, bsize, mi_row, mi_col, 1, preds1, strides);
- for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
- mbmi->interinter_wedge_index = wedge_index;
+ for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
+ mbmi->interinter_wedge_index = wedge_index >> 1;
+ mbmi->interinter_wedge_sign = wedge_index & 1;
vp10_build_wedge_inter_predictor_from_buf(xd, bsize, mi_row, mi_col,
preds0, strides,
preds1, strides);
@@ -6899,7 +6910,8 @@
}
if (best_rd_wedge < best_rd_nowedge) {
mbmi->use_wedge_interinter = 1;
- mbmi->interinter_wedge_index = best_wedge_index;
+ mbmi->interinter_wedge_index = best_wedge_index >> 1;
+ mbmi->interinter_wedge_sign = best_wedge_index & 1;
} else {
mbmi->use_wedge_interinter = 0;
}
@@ -6911,7 +6923,7 @@
pred_exists = 0;
tmp_rd = VPXMIN(best_rd_wedge, best_rd_nowedge);
if (mbmi->use_wedge_interinter)
- *compmode_wedge_cost = get_wedge_bits(bsize) * 256 +
+ *compmode_wedge_cost = (1 + get_wedge_bits_lookup[bsize]) * 256 +
vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
else
*compmode_wedge_cost =
@@ -6924,7 +6936,7 @@
int rmode, rate_sum;
int64_t dist_sum;
int j;
- int wedge_bits, wedge_types, wedge_index, best_wedge_index = -1;
+ int wedge_types, wedge_index, best_wedge_index = -1;
int64_t best_interintra_rd_nowedge = INT64_MAX;
int64_t best_interintra_rd_wedge = INT64_MAX;
int rwedge;
@@ -6992,7 +7004,6 @@
rmode = interintra_mode_cost[mbmi->interintra_mode];
if (is_interintra_wedge_used(bsize)) {
- wedge_bits = get_wedge_bits(bsize);
vp10_combine_interintra(xd, bsize, 0, tmp_buf, MAX_SB_SIZE,
intrapred, MAX_SB_SIZE);
vp10_combine_interintra(xd, bsize, 1,
@@ -7009,12 +7020,12 @@
best_interintra_rd_nowedge = rd;
mbmi->use_wedge_interintra = 1;
- rwedge = wedge_bits * 256 +
+ wedge_types = (1 << get_wedge_bits_lookup[bsize]);
+ rwedge = get_wedge_bits_lookup[bsize] * 256 +
vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
- wedge_types = (1 << wedge_bits);
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mbmi->interintra_wedge_index = wedge_index;
- mbmi->interintra_uv_wedge_index = wedge_index;
+ mbmi->interintra_wedge_sign = 0;
vp10_combine_interintra(xd, bsize, 0,
tmp_buf, MAX_SB_SIZE,
intrapred, MAX_SB_SIZE);
@@ -7037,9 +7048,9 @@
if (have_newmv_in_inter_mode(this_mode)) {
// get negative of mask
const uint8_t* mask = vp10_get_soft_mask(
- best_wedge_index ^ 1, bsize, bh, bw);
+ best_wedge_index, 1, bsize, bh, bw);
mbmi->interintra_wedge_index = best_wedge_index;
- mbmi->interintra_uv_wedge_index = best_wedge_index;
+ mbmi->interintra_wedge_sign = 0;
do_masked_motion_search(cpi, x, mask, MASK_MASTER_STRIDE, bsize,
mi_row, mi_col, &tmp_mv, &tmp_rate_mv,
0, mv_idx);
@@ -7062,7 +7073,7 @@
if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
mbmi->use_wedge_interintra = 1;
mbmi->interintra_wedge_index = best_wedge_index;
- mbmi->interintra_uv_wedge_index = best_wedge_index;
+ mbmi->interintra_wedge_sign = 0;
best_interintra_rd = best_interintra_rd_wedge;
mbmi->mv[0].as_int = tmp_mv.as_int;
*rate2 += tmp_rate_mv - rate_mv;
@@ -7083,7 +7094,7 @@
*compmode_interintra_cost += vp10_cost_bit(
cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
if (mbmi->use_wedge_interintra) {
- *compmode_interintra_cost += get_wedge_bits(bsize) * 256;
+ *compmode_interintra_cost += get_wedge_bits_lookup[bsize] * 256;
}
}
} else if (is_interintra_allowed(mbmi)) {
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index f3fa210..29cf04d 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c
@@ -168,14 +168,12 @@
#if CONFIG_EXT_PARTITION_TYPES
PARTITION_TYPE partition;
#else
- const int bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
- const int bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
+ int bw, bh;
#endif // CONFIG_EXT_PARTITION_TYPES
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
-
#if CONFIG_EXT_PARTITION_TYPES
if (bsize == BLOCK_8X8)
partition = PARTITION_NONE;
@@ -263,6 +261,9 @@
assert(0);
}
#else
+ bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
+ bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
+
if (bw == bs && bh == bs) {
count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, bs, bs, mi_row, mi_col);