ARBITRARY_WEDGE: save raw mask instead of smoothed
Also:
- For RLE, compute rate in bits * 512 also
- Bitstream writing is partial: call to rle only, but actually write to
bitstream is pending
- use av1_cost_literal
- Build fix with dump masks
Change-Id: I46efdf9f22f781c4eb5e43bd2dfaebdab018cc80
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 7428fc2..5d6af0b 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -206,6 +206,9 @@
// sent together in functions related to interinter compound modes
typedef struct {
uint8_t *seg_mask;
+#if CONFIG_ARBITRARY_WEDGE
+ uint8_t *seg_mask_smoothed;
+#endif // CONFIG_ARBITRARY_WEDGE
int8_t wedge_index;
int8_t wedge_sign;
DIFFWTD_MASK_TYPE mask_type;
@@ -831,6 +834,13 @@
*/
DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
+#if CONFIG_ARBITRARY_WEDGE
+ // Only used for arbitrary wedge. Derived from 'seg_mask' by extending binary
+ // mask range, and then smoothing to get a contiguous soft mask.
+ // TODO(urvang): Does size need to be 2 times?
+ DECLARE_ALIGNED(16, uint8_t, seg_mask_smoothed[2 * MAX_SB_SQUARE]);
+#endif // CONFIG_ARBITRARY_WEDGE
+
/*!
* CFL (chroma from luma) related parameters.
*/
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 93b737d..5a00c63 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -26,6 +26,10 @@
#include "av1/common/obmc.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
+// TODO(urvang): Move common parts to av1/common.
+#if CONFIG_ARBITRARY_WEDGE
+#include "av1/encoder/segment_patch.h"
+#endif // CONFIG_ARBITRARY_WEDGE
// This function will determine whether or not to create a warped
// prediction.
@@ -330,8 +334,23 @@
case COMPOUND_WEDGE:
#if CONFIG_ARBITRARY_WEDGE
if (av1_wedge_params_lookup[sb_type].codebook == NULL) {
- // We are using an arbitrary mask, stored earlier.
- return comp_data->seg_mask;
+ // We are using an arbitrary mask, stored earlier. But we need to derive
+ // the smooth mask from raw binary mask.
+ const int bw = block_size_wide[sb_type];
+ const int bh = block_size_high[sb_type];
+ const int N = bw * bh;
+
+ memcpy(comp_data->seg_mask_smoothed, comp_data->seg_mask,
+ N * sizeof(*comp_data->seg_mask_smoothed));
+
+ // TODO(urvang): Refactor part below.
+
+ // Convert binary mask with values {0, 1} to one with values {0, 64}.
+ av1_extend_binary_mask_range(comp_data->seg_mask_smoothed, bw, bh);
+
+ // Get a smooth mask from the binary mask.
+ av1_apply_box_blur(comp_data->seg_mask_smoothed, bw, bh);
+ return comp_data->seg_mask_smoothed;
}
#endif // CONFIG_ARBITRARY_WEDGE
return av1_get_contiguous_soft_mask(comp_data->wedge_index,
@@ -951,6 +970,9 @@
}
// Assign physical buffer.
inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
+#if CONFIG_ARBITRARY_WEDGE
+ inter_pred_params.mask_comp.seg_mask_smoothed = xd->seg_mask_smoothed;
+#endif // CONFIG_ARBITRARY_WEDGE
}
av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f988e81..7f292bc 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -43,6 +43,9 @@
#include "av1/encoder/encodetxb.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/palette.h"
+#if CONFIG_ARBITRARY_WEDGE
+#include "av1/encoder/segment_patch.h"
+#endif // CONFIG_ARBITRARY_WEDGE
#include "av1/encoder/segmentation.h"
#include "av1/encoder/tokenize.h"
@@ -1235,9 +1238,28 @@
if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
- aom_write_symbol(w, mbmi->interinter_comp.wedge_index,
- ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES);
- aom_write_bit(w, mbmi->interinter_comp.wedge_sign);
+ // TODO(urvang): Same on decoder side also.
+#if CONFIG_ARBITRARY_WEDGE
+ if (av1_wedge_params_lookup[bsize].codebook == NULL) {
+ // Arbirary wedge: run RLE and write the RLE output.
+ uint8_t rle_buf[3 * MAX_SB_SQUARE];
+ int rle_size = 0;
+ int rle_rate = 0;
+ av1_run_length_encode(mbmi->interinter_comp.seg_mask,
+ block_size_wide[bsize],
+ block_size_high[bsize],
+ block_size_wide[bsize], rle_buf, &rle_size,
+ &rle_rate);
+ // TODO(urvang): Write rle_buf.
+ } else {
+ // Transmit wedge index and sign only.
+#endif // CONFIG_ARBITRARY_WEDGE
+ aom_write_symbol(w, mbmi->interinter_comp.wedge_index,
+ ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES);
+ aom_write_bit(w, mbmi->interinter_comp.wedge_sign);
+#if CONFIG_ARBITRARY_WEDGE
+ }
+#endif // CONFIG_ARBITRARY_WEDGE
} else {
assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
aom_write_literal(w, mbmi->interinter_comp.mask_type,
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index 76eb4e1..470d377 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -306,11 +306,10 @@
#if CONFIG_ARBITRARY_WEDGE
// Create an arbitrary binary mask using spacial segmentation of this block.
// This is used for larger blocks, where we don't have pre-defined wedges.
-static int64_t pick_arbitrary_wedge(const AV1_COMP *const cpi,
- MACROBLOCK *const x, const BLOCK_SIZE bsize,
- const int16_t *const residual1,
- const int16_t *const diff10,
- uint8_t *seg_mask, uint64_t *best_sse) {
+static int64_t pick_arbitrary_wedge(
+ const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize,
+ const int16_t *const residual1, const int16_t *const diff10,
+ uint8_t *seg_mask, uint8_t *seg_mask_smoothed, uint64_t *best_sse) {
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
const int N = bw * bh;
@@ -327,38 +326,42 @@
params.k = 5000; // TODO(urvang): Temporary hack to get 2 components.
int num_components = -1;
av1_get_segments(x->plane[0].src.buf, bw, bh, x->plane[0].src.stride, ¶ms,
- seg_mask, &num_components);
+ seg_mask_smoothed, &num_components);
if (num_components >= 2) {
// TODO(urvang): Convert more than 2 components to 2 components.
if (num_components == 2) {
+ // Save the raw mask to 'seg_mask', as that is the one to be used for
+ // signaling in the bitstream.
+ memcpy(seg_mask, seg_mask_smoothed, N * sizeof(*seg_mask));
+
+ // TODO(urvang): Refactor part below.
+
// Convert binary mask with values {0, 1} to one with values {0, 64}.
- av1_extend_binary_mask_range(seg_mask, bw, bh);
+ av1_extend_binary_mask_range(seg_mask_smoothed, bw, bh);
#if DUMP_SEGMENT_MASKS
- av1_dump_raw_y_plane(seg_mask, bw, bh, bw, "/tmp/2.binary_mask.yuv");
+ av1_dump_raw_y_plane(seg_mask_smoothed, bw, bh, bw, "/tmp/2.binary_mask.yuv");
#endif // DUMP_SEGMENT_MASKS
// Get a smooth mask from the binary mask.
- av1_apply_box_blur(seg_mask, bw, bh);
+ av1_apply_box_blur(seg_mask_smoothed, bw, bh);
#if DUMP_SEGMENT_MASKS
- av1_dump_raw_y_plane(seg_mask, bw, bh, bw, "/tmp/3.smooth_mask.yuv");
+ av1_dump_raw_y_plane(seg_mask_smoothed, bw, bh, bw, "/tmp/3.smooth_mask.yuv");
#endif // DUMP_SEGMENT_MASKS
// Get RDCost
*best_sse =
- av1_wedge_sse_from_residuals(residual1, diff10, seg_mask, N);
+ av1_wedge_sse_from_residuals(residual1, diff10, seg_mask_smoothed, N);
const MACROBLOCKD *const xd = &x->e_mbd;
const int hbd = is_cur_buf_hbd(xd);
const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
- *best_sse = ROUND_POWER_OF_TWO(*best_sse, bd_round);
+ *best_sse = ROUND_POWER_OF_TWO(*best_sse, bd_round);
int rate;
int64_t dist;
- model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, *best_sse, N,
- &rate, &dist);
- // TODO(urvang): Add cost of signaling wedge itself to 'rate'.
+ model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, *best_sse,
+ N, &rate, &dist);
const int64_t rd = RDCOST(x->rdmult, rate, dist);
- // TODO(urvang): Subtrack rate of signaling wedge (like pick_wedge)?
return rd;
}
return INT64_MAX;
@@ -385,10 +388,11 @@
#if CONFIG_ARBITRARY_WEDGE
if (av1_wedge_params_lookup[bsize].codebook == NULL) {
- // TODO(urvang): Reuse seg_mask or have a different wedge_mask array?
mbmi->interinter_comp.seg_mask = xd->seg_mask;
- rd = pick_arbitrary_wedge(cpi, x, bsize, residual1, diff10,
- mbmi->interinter_comp.seg_mask, best_sse);
+ mbmi->interinter_comp.seg_mask_smoothed = xd->seg_mask_smoothed;
+ rd = pick_arbitrary_wedge(
+ cpi, x, bsize, residual1, diff10, mbmi->interinter_comp.seg_mask,
+ mbmi->interinter_comp.seg_mask_smoothed, best_sse);
mbmi->interinter_comp.wedge_sign = 0;
mbmi->interinter_comp.wedge_index = -1;
return rd;
@@ -1135,15 +1139,15 @@
// 3*n, as storing each length takes 2 bytes.
uint8_t rle_buf[3 * MAX_SB_SQUARE];
int rle_size = 0;
+ int rle_rate = INT_MAX;
av1_run_length_encode(mbmi->interinter_comp.seg_mask, bw, bh, bw, rle_buf,
- &rle_size);
- return rle_size;
+ &rle_size, &rle_rate);
+ return rle_rate;
}
#endif // CONFIG_ARBITRARY_WEDGE
return av1_cost_literal(1) +
- mode_costs
- ->wedge_idx_cost[mbmi->sb_type]
- [mbmi->interinter_comp.wedge_index];
+ mode_costs->wedge_idx_cost[mbmi->sb_type]
+ [mbmi->interinter_comp.wedge_index];
} else {
assert(compound_type == COMPOUND_DIFFWTD);
return av1_cost_literal(1);
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index dedc8b3..7d7f5e6 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -691,6 +691,9 @@
int_mv tmp_mv[2];
int tmp_rate_mv = 0;
mbmi->interinter_comp.seg_mask = xd->seg_mask;
+#if CONFIG_ARBITRARY_WEDGE
+ mbmi->interinter_comp.seg_mask_smoothed = xd->seg_mask_smoothed;
+#endif // CONFIG_ARBITRARY_WEDGE
const INTERINTER_COMPOUND_DATA *compound_data = &mbmi->interinter_comp;
if (this_mode == NEW_NEWMV) {
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c
index dbe7e4b..6d75d87 100644
--- a/av1/encoder/reconinter_enc.c
+++ b/av1/encoder/reconinter_enc.c
@@ -368,6 +368,9 @@
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
mbmi->interinter_comp.seg_mask = xd->seg_mask;
+#if CONFIG_ARBITRARY_WEDGE
+ mbmi->interinter_comp.seg_mask_smoothed = xd->seg_mask_smoothed;
+#endif // CONFIG_ARBITRARY_WEDGE
const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp;
const int is_hbd = is_cur_buf_hbd(xd);
diff --git a/av1/encoder/segment_patch.cc b/av1/encoder/segment_patch.cc
index 7080d50..d71750d 100644
--- a/av1/encoder/segment_patch.cc
+++ b/av1/encoder/segment_patch.cc
@@ -1,8 +1,12 @@
#include <assert.h>
#include <unordered_map>
+#if DUMP_SEGMENT_MASKS
+#include <fstream>
+#endif // DUMP_SEGMENT_MASKS
#include "aom_mem/aom_mem.h"
#include "av1/common/enums.h"
+#include "av1/encoder/cost.h"
#include "av1/encoder/segment_patch.h"
#include "third_party/segment/segment-image.h"
@@ -164,19 +168,29 @@
}
void av1_run_length_encode(const uint8_t *const img, int width, int height,
- int stride, uint8_t *out, int *out_size) {
+ int stride, uint8_t *out, int *out_size,
+ int *out_rate) {
int out_idx = 0;
uint8_t prev_val = img[0];
int run_len = 1;
+ int num_bits = 0;
+ const int bits_per_symbol = 1; // Assuming 2 segments exactly.
+ // Assuming 64x64 or 128x128 block sizes only.
+ assert((width == 64 && height == 64) || (width == 128 && height == 128));
+ const int bits_per_run_len =
+ (width == 128 && height == 128) ? 14 : 12; // log2(width * height)
for (int r = 0; r < height; ++r) {
for (int c = (r == 0) ? 1 : 0; c < width; ++c) {
const uint8_t curr_val = img[r * stride + c];
+ assert(curr_val < (1 << bits_per_symbol));
if (curr_val == prev_val) {
++run_len;
} else {
out[out_idx++] = prev_val;
+ num_bits += bits_per_symbol;
write_run_length(run_len, out, &out_idx);
+ num_bits += bits_per_run_len;
run_len = 1;
prev_val = curr_val;
}
@@ -185,6 +199,7 @@
out[out_idx++] = prev_val;
write_run_length(run_len, out, &out_idx);
*out_size = out_idx;
+ *out_rate = av1_cost_literal(num_bits);
}
#if DUMP_SEGMENT_MASKS
diff --git a/av1/encoder/segment_patch.h b/av1/encoder/segment_patch.h
index 41fde1a..567ee4a 100644
--- a/av1/encoder/segment_patch.h
+++ b/av1/encoder/segment_patch.h
@@ -56,9 +56,12 @@
// - stride: image stride
// Outputs:
// - out: run-length encoded image. Assumed to be already allocated.
-// - out_size: length of 'out'
+// - out_size: length of 'out' in bytes.
+// - out_rate: length of transmitting 'out' in bits, but scaled by 512.
+// TODO(urvang): Need to write in bits directly and consolidate last two.
void av1_run_length_encode(const uint8_t *const img, int width, int height,
- int stride, uint8_t *out, int *out_size);
+ int stride, uint8_t *out, int *out_size,
+ int *out_rate);
#define DUMP_SEGMENT_MASKS 0