CWG-D029: CfL improvements
This branch include the source code of CWG-D029 with two aspects: Simplification of scaling factor and replace sample-copying by collocated filter for DS in CfL mode.
STATS_CHANGED
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index fb2b5af..8b43bc3 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -224,7 +224,13 @@
input[i + 1] + input[bot + AOMMAX(-1, -i)] +
2 * input[bot] + input[bot + 1];
} else if (filter_type == 2) {
+#if CONFIG_CFL_IMPROVEMENTS
+ const int top = i - input_stride;
+ output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 4 * input[i] +
+ input[i + 1] + input[top] + input[bot];
+#else
output_q3[i >> 1] = input[i] * 8;
+#endif // CONFIG_CFL_IMPROVEMENTS
} else {
output_q3[i >> 1] =
(input[i] + input[i + 1] + input[bot] + input[bot + 1] + 2) << 1;
@@ -278,7 +284,13 @@
output_q3[j >> 1] = input[-1] + 2 * input[0] + input[1] +
input[bot - 1] + 2 * input[bot] + input[bot + 1];
} else if (filter_type == 2) {
+#if CONFIG_CFL_IMPROVEMENTS
+ const int top = (j == 0) ? 0 : (0 - input_stride);
+ output_q3[j >> 1] =
+ input[-1] + 4 * input[0] + input[1] + input[top] + input[bot];
+#else
output_q3[j >> 1] = input[0] * 8;
+#endif // CONFIG_CFL_IMPROVEMENTS
} else {
output_q3[j >> 1] =
(input[0] + input[1] + input[bot] + input[bot + 1]) << 1;
@@ -601,7 +613,14 @@
int height) {
for (int j = 0; j < height; j += 2) {
for (int i = 0; i < width; i += 2) {
+#if CONFIG_CFL_IMPROVEMENTS
+ const int top = (j == 0) ? i : (i - input_stride);
+ const int bot = i + input_stride;
+ output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 4 * input[i] +
+ input[i + 1] + input[top] + input[bot];
+#else
output_q3[i >> 1] = input[i] * 8;
+#endif // CONFIG_CFL_IMPROVEMENTS
}
input += input_stride << 1;
output_q3 += CFL_BUF_LINE;
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index b0a057e..bd04055 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1466,7 +1466,17 @@
static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294)
};
-
+#if CONFIG_CFL_IMPROVEMENTS
+static const aom_cdf_prob
+ default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
+ { AOM_CDF8(7650, 20740, 31430, 32520, 32700, 32730, 32740) },
+ { AOM_CDF8(14400, 23680, 28230, 31270, 32290, 32530, 32640) },
+ { AOM_CDF8(11560, 22430, 28510, 31430, 32430, 32610, 32680) },
+ { AOM_CDF8(27000, 31430, 32310, 32610, 32730, 32740, 32750) },
+ { AOM_CDF8(17320, 26210, 29100, 30820, 31550, 32150, 32430) },
+ { AOM_CDF8(14990, 22180, 26430, 28600, 29820, 31200, 31980) }
+ };
+#else
static const aom_cdf_prob
default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
{ AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696, 32700,
@@ -1482,7 +1492,7 @@
{ AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843, 32144,
32413, 32520, 32594, 32622, 32656, 32660) }
};
-
+#endif // CONFIG_CFL_IMPROVEMENTS
static const aom_cdf_prob
default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
SWITCHABLE_FILTERS)] = {
diff --git a/av1/common/enums.h b/av1/common/enums.h
index ea2a2e4..c5493ff 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -651,7 +651,11 @@
enum { PLANE_TYPE_Y, PLANE_TYPE_UV, PLANE_TYPES } UENUM1BYTE(PLANE_TYPE);
+#if CONFIG_CFL_IMPROVEMENTS
+#define CFL_ALPHABET_SIZE_LOG2 3
+#else
#define CFL_ALPHABET_SIZE_LOG2 4
+#endif // CONFIG_CFL_IMPROVEMENTS
#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 276b474..bf932d3 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1680,9 +1680,13 @@
dst += CFL_BUF_LINE;
}
}
-
+#if CONFIG_CFL_IMPROVEMENTS
+static int64_t compute_sad(const uint16_t *src, uint16_t *src2, int width,
+ int height, int round_offset, int src2_stride) {
+#else
static int compute_sad(const uint16_t *src, uint16_t *src2, int width,
int height, int round_offset, int src2_stride) {
+#endif // CONFIG_CFL_IMPROVEMENTS
int sad = round_offset;
for (int j = 0; j < height; ++j) {
for (int i = 0; i < width; ++i) {
@@ -1691,7 +1695,11 @@
src += CFL_BUF_LINE;
src2 += src2_stride;
}
+#if CONFIG_CFL_IMPROVEMENTS
+ return sad;
+#else
return (sad / (height * width));
+#endif // CONFIG_CFL_IMPROVEMENTS
}
static void cfl_predict_hbd_pre_analysis(const int16_t *ac_buf_q3,
@@ -1761,14 +1769,23 @@
const int subsampling_x = cpi->unfiltered_source->subsampling_x;
const int subsampling_y = cpi->unfiltered_source->subsampling_y;
+#if CONFIG_CFL_IMPROVEMENTS
+ const int blk_w = 16;
+ const int blk_h = 16;
+#else
const int blk_w = 32;
const int blk_h = 32;
+#endif // CONFIG_CFL_IMPROVEMENTS
uint16_t recon_buf_q3[CFL_BUF_SQUARE];
uint16_t dc_buf_q3[CFL_BUF_SQUARE];
// Q3 AC contributions (reconstructed luma pixels - tx block avg)
int16_t ac_buf_q3[CFL_BUF_SQUARE];
+#if CONFIG_CFL_IMPROVEMENTS
+ int64_t cost[3] = { 0, 0, 0 };
+#else
int cost[3] = { 0, 0, 0 };
+#endif // CONFIG_CFL_IMPROVEMENTS
for (int filter_type = 0; filter_type < 3; ++filter_type) {
for (int comp = 0; comp < 2; comp++) {
for (int r = 2; r + blk_h <= height - 2; r += blk_h) {
@@ -1803,15 +1820,24 @@
chroma_stride, blk_w >> 1, blk_h >> 1);
cfl_predict_hbd_pre_analysis(ac_buf_q3, dc_buf_q3, CFL_BUF_LINE,
alpha, bd, blk_w >> 1, blk_h >> 1);
+#if CONFIG_CFL_IMPROVEMENTS
+ int64_t filter_cost =
+ compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, 2,
+ chroma_stride);
+#else
int filter_cost = compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1,
blk_h >> 1, 2, chroma_stride);
+#endif // CONFIG_CFL_IMPROVEMENTS
cost[filter_type] = cost[filter_type] + filter_cost;
}
}
}
}
-
+#if CONFIG_CFL_IMPROVEMENTS
+ int64_t min_cost = INT64_MAX;
+#else
int min_cost = INT_MAX;
+#endif // CONFIG_CFL_IMPROVEMENTS
for (int i = 0; i < 3; ++i) {
if (cost[i] < min_cost) {
min_cost = cost[i];
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 588ed2c..9fd4d2c 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -221,6 +221,8 @@
set_aom_config_var(CONFIG_BVCOST_UPDATE 1 "Enables sb-level update for bv cost")
set_aom_config_var(CONFIG_CCSO_EXT 1
"AV2 experiment flag to enable extended CCSO.")
+set_aom_config_var(CONFIG_CFL_IMPROVEMENTS 1
+ "AV2 Cfl improvements from CWG-D029.")
set_aom_config_var(CONFIG_ADAPTIVE_MVD 1 "Enable adaptive MVD resolution")
set_aom_config_var(CONFIG_JOINT_MVD 1 "Enable joint MVD coding")
set_aom_config_var(CONFIG_IMPROVED_JMVD 1