Merge dering/clpf rdo and filtering
* Dering and clpf were merged into a single pass.
* 32x32 and 128x128 filter block sizes for clpf were removed.
* RDO for dering and clpf merged and improved:
- "0" no longer required to be in the strength selection
- Dering strength can now be 0, 1 or 2 bits per block
LL HL
PSNR: -0.04 -0.01
PSNR HVS: -0.27 -0.18
SSIM: -0.15 +0.01
CIEDE 2000: -0.11 -0.03
APSNR: -0.03 -0.00
MS SSIM: -0.18 -0.11
Change-Id: I9f002a16ad218eab6007f90f1f176232443495f0
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 7ff1e16..587134f 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -24,8 +24,8 @@
#endif // CONFIG_BITSTREAM_DEBUG
#if CONFIG_CDEF
+#include "av1/common/cdef.h"
#include "av1/common/clpf.h"
-#include "av1/common/dering.h"
#endif // CONFIG_CDEF
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
@@ -2735,95 +2735,29 @@
#if CONFIG_CDEF
#if CONFIG_EXT_PARTITION
if (cm->sb_size == BLOCK_128X128 && bsize == BLOCK_128X128 &&
- cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
+ !sb_all_skip(cm, mi_row, mi_col)) {
aom_write_literal(
w,
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain,
- DERING_REFINEMENT_BITS);
+ cm->dering_bits);
+ aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
+ ->mbmi.clpf_strength,
+ cm->clpf_bits);
} else if (cm->sb_size == BLOCK_64X64 && bsize == BLOCK_64X64 &&
#else
if (bsize == BLOCK_64X64 &&
#endif // CONFIG_EXT_PARTITION
- cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
- aom_write_literal(
- w,
- cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain,
- DERING_REFINEMENT_BITS);
+ !sb_all_skip(cm, mi_row, mi_col)) {
+ if (cm->dering_bits)
+ aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
+ ->mbmi.dering_gain,
+ cm->dering_bits);
+ if (cm->clpf_bits)
+ aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
+ ->mbmi.clpf_strength,
+ cm->clpf_bits);
}
#endif
-
-#if CONFIG_CDEF
-#if CONFIG_EXT_PARTITION
- if (cm->sb_size == BLOCK_128X128 && bsize == BLOCK_128X128 &&
- cm->clpf_blocks && cm->clpf_strength_y && cm->clpf_size != CLPF_NOSIZE) {
- const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride +
- mi_col * MI_SIZE / MIN_FB_SIZE;
- if (cm->clpf_size == CLPF_128X128 && cm->clpf_blocks[tl] != CLPF_NOFLAG) {
- aom_write_literal(w, cm->clpf_blocks[tl], 1);
- } else if (cm->clpf_size == CLPF_64X64) {
- const int tr = tl + 2;
- const int bl = tl + 2 * cm->clpf_stride;
- const int br = tr + 2 * cm->clpf_stride;
-
- // Up to four bits per SB.
- if (cm->clpf_blocks[tl] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[tl], 1);
-
- if (mi_col + MI_SIZE < cm->mi_cols && cm->clpf_blocks[tr] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[tr], 1);
-
- if (mi_row + MI_SIZE < cm->mi_rows && cm->clpf_blocks[bl] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[bl], 1);
-
- if (mi_row + MI_SIZE < cm->mi_rows && mi_col + MI_SIZE < cm->mi_cols &&
- cm->clpf_blocks[br] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[br], 1);
- } else if (cm->clpf_size == CLPF_32X32) {
- int i, j;
- const int size = 32 / MI_SIZE;
- // Up to sixteen bits per SB.
- for (i = 0; i < 4; ++i)
- for (j = 0; j < 4; ++j) {
- const int index = tl + i * cm->clpf_stride + j;
- if (mi_row + i * size < cm->mi_rows &&
- mi_col + j * size < cm->mi_cols &&
- cm->clpf_blocks[index] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[index], 1);
- }
- }
- } else if (cm->sb_size == BLOCK_64X64 && bsize == BLOCK_64X64 &&
-#else
- if (bsize == BLOCK_64X64 &&
-#endif // CONFIG_EXT_PARTITION
- cm->clpf_blocks && cm->clpf_strength_y &&
- cm->clpf_size != CLPF_NOSIZE) {
- const int tl = mi_row * MI_SIZE / MIN_FB_SIZE * cm->clpf_stride +
- mi_col * MI_SIZE / MIN_FB_SIZE;
- const int tr = tl + 1;
- const int bl = tl + cm->clpf_stride;
- const int br = tr + cm->clpf_stride;
-
- // Up to four bits per SB.
- // When clpf_size indicates a size larger than the SB size
- // (CLPF_128X128), one bit for every fourth SB will be transmitted
- // regardless of skip blocks.
- if (cm->clpf_blocks[tl] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[tl], 1);
-
- if (mi_col + MI_SIZE / 2 < cm->mi_cols &&
- cm->clpf_blocks[tr] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[tr], 1);
-
- if (mi_row + MI_SIZE / 2 < cm->mi_rows &&
- cm->clpf_blocks[bl] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[bl], 1);
-
- if (mi_row + MI_SIZE / 2 < cm->mi_rows &&
- mi_col + MI_SIZE / 2 < cm->mi_cols &&
- cm->clpf_blocks[br] != CLPF_NOFLAG)
- aom_write_literal(w, cm->clpf_blocks[br], 1);
- }
-#endif // CONFIG_CDEF
}
static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
@@ -3522,22 +3456,13 @@
}
#if CONFIG_CDEF
-static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
- aom_wb_write_literal(wb, cm->clpf_strength_y, 2);
+static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
+ aom_wb_write_literal(wb, cm->dering_level, DERING_LEVEL_BITS);
aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
- if (cm->clpf_strength_y) {
- aom_wb_write_literal(wb, cm->clpf_size, 2);
- }
}
#endif
-#if CONFIG_CDEF
-static void encode_dering(int level, struct aom_write_bit_buffer *wb) {
- aom_wb_write_literal(wb, level, DERING_LEVEL_BITS);
-}
-#endif // CONFIG_CDEF
-
static void write_delta_q(struct aom_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
aom_wb_write_bit(wb, 1);
@@ -4481,8 +4406,7 @@
encode_loopfilter(cm, wb);
#if CONFIG_CDEF
- encode_dering(cm->dering_level, wb);
- encode_clpf(cm, wb);
+ encode_cdef(cm, wb);
#endif
#if CONFIG_LOOP_RESTORATION
encode_restoration_mode(cm, wb);
diff --git a/av1/encoder/clpf_rdo.c b/av1/encoder/clpf_rdo.c
index 3ef67cc..0173681 100644
--- a/av1/encoder/clpf_rdo.c
+++ b/av1/encoder/clpf_rdo.c
@@ -142,68 +142,17 @@
}
#endif
-int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
- const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
- int block_size, int w, int h, unsigned int strength,
- unsigned int fb_size_log2, int8_t *res, int plane) {
- int m, n, sum0 = 0, sum1 = 0;
- int damping =
- cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
-
- for (m = 0; m < h; m++) {
- for (n = 0; n < w; n++) {
- int xpos = (l << fb_size_log2) + n * block_size;
- int ypos = (k << fb_size_log2) + m * block_size;
- if (fb_size_log2 == MAX_FB_SIZE_LOG2 ||
- !cm->mi_grid_visible[ypos / MI_SIZE * cm->mi_stride + xpos / MI_SIZE]
- ->mbmi.skip) {
-#if CONFIG_AOM_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- aom_clpf_detect_hbd(CONVERT_TO_SHORTPTR(rec->y_buffer),
- CONVERT_TO_SHORTPTR(org->y_buffer), rec->y_stride,
- org->y_stride, xpos, ypos, rec->y_crop_width,
- rec->y_crop_height, &sum0, &sum1, strength,
- block_size, cm->bit_depth, damping);
- } else {
- aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
- org->y_stride, xpos, ypos, rec->y_crop_width,
- rec->y_crop_height, &sum0, &sum1, strength,
- block_size, damping);
- }
-#else
- aom_clpf_detect(rec->y_buffer, org->y_buffer, rec->y_stride,
- org->y_stride, xpos, ypos, rec->y_crop_width,
- rec->y_crop_height, &sum0, &sum1, strength, block_size,
- damping);
-#endif
- }
- }
- }
- *res = sum1 < sum0;
- return *res;
-}
-
// Calculate the square error of all filter settings. Result:
// res[0][0] : unfiltered
// res[0][1-3] : strength=1,2,4, no signals
-// (Only for luma:)
-// res[1][0] : (bit count, fb size = 128)
-// res[1][1-3] : strength=1,2,4, fb size = 128
-// res[1][4] : unfiltered, including skip
-// res[1][5-7] : strength=1,2,4, including skip, fb_size = 128
-// res[2][0] : (bit count, fb size = 64)
-// res[2][1-3] : strength=1,2,4, fb size = 64
-// res[3][0] : (bit count, fb size = 32)
-// res[3][1-3] : strength=1,2,4, fb size = 32
-static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
- const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
- unsigned int block_size, unsigned int fb_size_log2, int w,
- int h, int64_t res[4][8], int plane) {
- int c, m, n, filtered = 0;
- int sum[8];
+static void clpf_rdo(const YV12_BUFFER_CONFIG *rec,
+ const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
+ unsigned int block_size, int w, int h, uint64_t res[4],
+ int plane) {
+ int m, n;
+ int sum[4];
const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
- int bslog = get_msb(block_size);
uint8_t *rec_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? rec->u_buffer : rec->v_buffer)
@@ -220,166 +169,64 @@
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
- sum[0] = sum[1] = sum[2] = sum[3] = sum[4] = sum[5] = sum[6] = sum[7] = 0;
- if (plane == AOM_PLANE_Y &&
- fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
- int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
-
- filtered = fb_size_log2-- == MAX_FB_SIZE_LOG2;
- w1 = AOMMIN(1 << (fb_size_log2 - bslog), w);
- h1 = AOMMIN(1 << (fb_size_log2 - bslog), h);
- w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1);
- h2 = AOMMIN(h - (1 << (fb_size_log2 - bslog)), h >> 1);
- i = get_msb(MAX_FB_SIZE) - fb_size_log2;
- sum1 = (int)res[i][1];
- sum2 = (int)res[i][2];
- sum3 = (int)res[i][3];
- oldfiltered = (int)res[i][0];
- res[i][0] = 0;
-
- filtered |= clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1,
- res, plane);
- if (1 << (fb_size_log2 - bslog) < w)
- filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
- fb_size_log2, w2, h1, res, plane);
- if (1 << (fb_size_log2 - bslog) < h) {
- filtered |= clpf_rdo(y + (1 << fb_size_log2), x, rec, org, cm, block_size,
- fb_size_log2, w1, h2, res, plane);
- filtered |=
- clpf_rdo(y + (1 << fb_size_log2), x + (1 << fb_size_log2), rec, org,
- cm, block_size, fb_size_log2, w2, h2, res, plane);
- }
-
- // Correct sums for unfiltered blocks
- res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
- res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]);
- res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]);
- if (i == 1) {
- res[i][5] = AOMMIN(sum1 + res[i][4], res[i][5]);
- res[i][6] = AOMMIN(sum2 + res[i][4], res[i][6]);
- res[i][7] = AOMMIN(sum3 + res[i][4], res[i][7]);
- }
-
- res[i][0] = oldfiltered + filtered; // Number of signal bits
-
- return filtered;
- }
+ sum[0] = sum[1] = sum[2] = sum[3] = 0;
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
- int xpos = x + n * block_size;
- int ypos = y + m * block_size;
- int skip = // Filtered skip blocks stored only for fb_size == 128
- 4 *
- !!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
- (xpos << subx) / MI_SIZE]
- ->mbmi.skip;
+ int xpos = n * block_size;
+ int ypos = m * block_size;
+ if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
+ (xpos << subx) / MI_SIZE]
+ ->mbmi.skip) {
#if CONFIG_AOM_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- aom_clpf_detect_multi_hbd(
- CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
- rec_stride, org_stride, xpos, ypos, rec_width, rec_height,
- sum + skip, block_size, cm->bit_depth, damping);
- } else {
- aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
- xpos, ypos, rec_width, rec_height, sum + skip,
- block_size, damping);
- }
+ if (cm->use_highbitdepth) {
+ aom_clpf_detect_multi_hbd(
+ CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
+ rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum,
+ block_size, cm->bit_depth, damping);
+ } else {
+ aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
+ xpos, ypos, rec_width, rec_height, sum,
+ block_size, damping);
+ }
#else
- aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
- xpos, ypos, rec_width, rec_height, sum + skip,
- block_size, damping);
+ aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
+ xpos, ypos, rec_width, rec_height, sum,
+ block_size, damping);
#endif
- filtered |= !skip;
+ }
}
}
- for (c = 0; c < (plane == AOM_PLANE_Y ? 4 : 1); c++) {
- res[c][0] += sum[0];
- res[c][1] += sum[1];
- res[c][2] += sum[2];
- res[c][3] += sum[3];
- if (c != 1) continue;
- // Only needed when fb_size == 128
- res[c][4] += sum[4];
- res[c][5] += sum[5];
- res[c][6] += sum[6];
- res[c][7] += sum[7];
- }
- return filtered;
+ res[0] += sum[0];
+ res[1] += sum[1];
+ res[2] += sum[2];
+ res[3] += sum[3];
}
-void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
+void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
- int *best_strength, int *best_bs, int plane) {
- int c, j, k, l;
- int64_t best, sums[4][8];
+ int *best_strength, int plane) {
+ int i;
+ uint64_t best, sums[4];
int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
const int bs = MI_SIZE;
const int bslog = get_msb(bs);
- int fb_size_log2 = get_msb(MAX_FB_SIZE);
- int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
- int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
memset(sums, 0, sizeof(sums));
- if (plane != AOM_PLANE_Y)
- // Use a block size of MI_SIZE regardless of the subsampling. This
- // This is accurate enough to determine the best strength and
- // we don't need to add SIMD optimisations for 4x4 blocks.
- clpf_rdo(0, 0, rec, org, cm, bs, fb_size_log2, width >> bslog,
- height >> bslog, sums, plane);
- else
- for (k = 0; k < num_fb_ver; k++) {
- for (l = 0; l < num_fb_hor; l++) {
- // Calculate the block size after frame border clipping
- int h =
- AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
- int w =
- AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
- h += !h << fb_size_log2;
- w += !w << fb_size_log2;
- clpf_rdo(k << fb_size_log2, l << fb_size_log2, rec, org, cm, MI_SIZE,
- fb_size_log2, w >> bslog, h >> bslog, sums, plane);
- }
- }
+ clpf_rdo(rec, org, cm, bs, width >> bslog, height >> bslog, sums, plane);
- // For fb_size == 128 skip blocks are included in the result.
- if (plane == AOM_PLANE_Y) {
- sums[1][1] += sums[1][5] - sums[1][4];
- sums[1][2] += sums[1][6] - sums[1][4];
- sums[1][3] += sums[1][7] - sums[1][4];
- } else { // Slightly favour unfiltered chroma
- sums[0][0] -= sums[0][0] >> 7;
- }
+ // Add a favourable bias for conservative strengths
+ for (i = 0; i < 4; i++) sums[i] -= sums[i] >> (7 + i);
- for (j = 0; j < 4; j++) {
- static const double lambda_square[] = {
- // exp(x / 8.5)
- 1.0000, 1.1248, 1.2653, 1.4232, 1.6009, 1.8008, 2.0256, 2.2785,
- 2.5630, 2.8830, 3.2429, 3.6478, 4.1032, 4.6155, 5.1917, 5.8399,
- 6.5689, 7.3891, 8.3116, 9.3492, 10.516, 11.829, 13.306, 14.967,
- 16.836, 18.938, 21.302, 23.962, 26.953, 30.318, 34.103, 38.361,
- 43.151, 48.538, 54.598, 61.414, 69.082, 77.706, 87.408, 98.320,
- 110.59, 124.40, 139.93, 157.40, 177.05, 199.16, 224.02, 251.99,
- 283.45, 318.84, 358.65, 403.42, 453.79, 510.45, 574.17, 645.86,
- 726.49, 817.19, 919.22, 1033.9, 1163.0, 1308.2, 1471.6, 1655.3
- };
+ // Tag the strength to the error
+ for (i = 0; i < 4; i++) sums[i] = (sums[i] << 2) + i;
- // Estimate the bit costs and adjust the square errors
- double lambda =
- lambda_square[av1_get_qindex(&cm->seg, 0, cm->base_qindex) >> 2];
- int i, cost = (int)((lambda * (sums[j][0] + 6 + 2 * (j > 0)) + 0.5));
- for (i = 0; i < 4; i++)
- sums[j][i] = ((sums[j][i] + (i && j) * cost) << 4) + j * 4 + i;
- }
-
- best = (int64_t)1 << 62;
- for (c = 0; c < (plane == AOM_PLANE_Y ? 4 : 1); c++)
- for (j = 0; j < 4; j++)
- if ((!c || j) && sums[c][j] < best) best = sums[c][j];
- best &= 15;
- if (best_bs) *best_bs = (best > 3) * (5 + (best < 12) + (best < 8));
- *best_strength = best ? 1 << ((best - 1) & 3) : 0;
+ // Identify the strength with the smallest error
+ best = (uint64_t)1 << 63;
+ for (i = 0; i < 4; i++)
+ if (sums[i] < best) best = sums[i];
+ *best_strength = best & 3 ? 1 << ((best - 1) & 3) : 0;
}
diff --git a/av1/encoder/clpf_rdo.h b/av1/encoder/clpf_rdo.h
index f92f7d2..e137378 100644
--- a/av1/encoder/clpf_rdo.h
+++ b/av1/encoder/clpf_rdo.h
@@ -14,13 +14,8 @@
#include "av1/common/reconinter.h"
-int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
- const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
- int block_size, int w, int h, unsigned int strength,
- unsigned int fb_size_log2, int8_t *res, int plane);
-
-void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
+void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
- int *best_strength, int *best_bs, int plane);
+ int *best_strength, int plane);
#endif
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index aca6e0b..7bc9710 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -17,10 +17,9 @@
#include "av1/common/alloccommon.h"
#if CONFIG_CDEF
-#include "aom/aom_image.h"
+#include "av1/common/cdef.h"
#include "av1/common/clpf.h"
#include "av1/encoder/clpf_rdo.h"
-#include "av1/common/dering.h"
#endif // CONFIG_CDEF
#include "av1/common/filter.h"
#include "av1/common/idct.h"
@@ -3526,57 +3525,18 @@
}
#if CONFIG_CDEF
if (is_lossless_requested(&cpi->oxcf)) {
- cm->dering_level = 0;
+ cm->dering_level = cm->clpf_strength_u = cm->clpf_strength_v = 0;
} else {
- cm->dering_level =
- av1_dering_search(cm->frame_to_show, cpi->Source, cm, xd);
- av1_dering_frame(cm->frame_to_show, cm, xd, cm->dering_level);
- }
- cm->clpf_strength_y = cm->clpf_strength_u = cm->clpf_strength_v = 0;
- cm->clpf_size = CLPF_64X64;
+ // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
+ av1_cdef_search(cm->frame_to_show, cpi->Source, cm, xd);
- // Allocate buffer to hold the status of all filter blocks:
- // 1 = On, 0 = off, -1 = implicitly off
- {
- int size;
- cm->clpf_stride = ((cm->frame_to_show->y_crop_width + MIN_FB_SIZE - 1) &
- ~(MIN_FB_SIZE - 1)) >>
- MIN_FB_SIZE_LOG2;
- size = cm->clpf_stride *
- ((cm->frame_to_show->y_crop_height + MIN_FB_SIZE - 1) &
- ~(MIN_FB_SIZE - 1)) >>
- MIN_FB_SIZE_LOG2;
- CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(size));
- memset(cm->clpf_blocks, CLPF_NOFLAG, size);
- }
+ // Apply the filter
+ av1_cdef_frame(cm->frame_to_show, cm, xd, cm->dering_level,
+ cm->clpf_strength_u, cm->clpf_strength_v);
- if (!is_lossless_requested(&cpi->oxcf)) {
- const YV12_BUFFER_CONFIG *const frame = cm->frame_to_show;
-
- // Find the best strength and block size for the entire frame
- int fb_size_log2, strength_y, strength_u, strength_v;
- av1_clpf_test_frame(frame, cpi->Source, cm, &strength_y, &fb_size_log2,
- AOM_PLANE_Y);
- av1_clpf_test_frame(frame, cpi->Source, cm, &strength_u, 0, AOM_PLANE_U);
- av1_clpf_test_frame(frame, cpi->Source, cm, &strength_v, 0, AOM_PLANE_V);
-
- if (strength_y) {
- // Apply the filter using the chosen strength
- cm->clpf_strength_y = strength_y - (strength_y == 4);
- cm->clpf_size =
- fb_size_log2 ? fb_size_log2 - MAX_FB_SIZE_LOG2 + 3 : CLPF_NOSIZE;
- av1_clpf_frame(frame, cpi->Source, cm, cm->clpf_size != CLPF_NOSIZE,
- strength_y, 4 + cm->clpf_size, AOM_PLANE_Y,
- av1_clpf_decision);
- }
- if (strength_u) {
- cm->clpf_strength_u = strength_u - (strength_u == 4);
- av1_clpf_frame(frame, NULL, cm, 0, strength_u, 4, AOM_PLANE_U, NULL);
- }
- if (strength_v) {
- cm->clpf_strength_v = strength_v - (strength_v == 4);
- av1_clpf_frame(frame, NULL, cm, 0, strength_v, 4, AOM_PLANE_V, NULL);
- }
+ // Pack the clpf chroma strengths into two bits each
+ cm->clpf_strength_u -= cm->clpf_strength_u == 4;
+ cm->clpf_strength_v -= cm->clpf_strength_v == 4;
}
#endif
#if CONFIG_LOOP_RESTORATION
@@ -4980,11 +4940,6 @@
if (cm->show_frame) dump_filtered_recon_frames(cpi);
#endif // DUMP_RECON_FRAMES
-#if CONFIG_CDEF
- aom_free(cm->clpf_blocks);
- cm->clpf_blocks = 0;
-#endif
-
if (cm->seg.update_map) update_reference_segmentation_map(cpi);
if (frame_is_intra_only(cm) == 0) {
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
new file mode 100644
index 0000000..4ff308e
--- /dev/null
+++ b/av1/encoder/pickcdef.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <math.h>
+#include <string.h>
+
+#include "./aom_scale_rtcd.h"
+#include "aom/aom_integer.h"
+#include "av1/common/cdef.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
+#include "av1/encoder/clpf_rdo.h"
+#include "av1/encoder/encoder.h"
+
+static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride,
+ int nhb, int nvb, int coeff_shift) {
+ int i, j;
+ double sum;
+ sum = 0;
+ for (i = 0; i < nvb << 3; i++) {
+ for (j = 0; j < nhb << 3; j++) {
+ double tmp;
+ tmp = x[i * xstride + j] - y[i * ystride + j];
+ sum += tmp * tmp;
+ }
+ }
+ return sum / (double)(1 << 2 * coeff_shift);
+}
+
+void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
+ AV1_COMMON *cm, MACROBLOCKD *xd) {
+ int r, c;
+ int sbr, sbc;
+ uint16_t *src;
+ uint16_t *ref_coeff;
+ dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
+ int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+ int stride;
+ int bsize[3];
+ int dec[3];
+ int pli;
+ int level;
+ int dering_count;
+ int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
+ uint64_t best_tot_mse = 0;
+ int sb_count;
+ int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
+ uint64_t(*mse)[DERING_STRENGTHS][CLPF_STRENGTHS] =
+ aom_malloc(sizeof(*mse) * nvsb * nhsb);
+ int clpf_damping = 3 + (cm->base_qindex >> 6);
+ int i;
+ int lev[DERING_REFINEMENT_LEVELS];
+ int best_lev[DERING_REFINEMENT_LEVELS];
+ int str[CLPF_REFINEMENT_LEVELS];
+ int best_str[CLPF_REFINEMENT_LEVELS];
+ double lambda = exp(cm->base_qindex / 36.0);
+ static int log2[] = { 0, 1, 2, 2 };
+
+ src = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
+ ref_coeff =
+ aom_memalign(32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
+ av1_setup_dst_planes(xd->plane, frame, 0, 0);
+ for (pli = 0; pli < 3; pli++) {
+ dec[pli] = xd->plane[pli].subsampling_x;
+ bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli];
+ }
+ stride = cm->mi_cols << bsize[0];
+ for (r = 0; r < cm->mi_rows << bsize[0]; ++r) {
+ for (c = 0; c < cm->mi_cols << bsize[0]; ++c) {
+#if CONFIG_AOM_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ src[r * stride + c] = CONVERT_TO_SHORTPTR(
+ xd->plane[0].dst.buf)[r * xd->plane[0].dst.stride + c];
+ ref_coeff[r * stride + c] =
+ CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c];
+ } else {
+#endif
+ src[r * stride + c] =
+ xd->plane[0].dst.buf[r * xd->plane[0].dst.stride + c];
+ ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c];
+#if CONFIG_AOM_HIGHBITDEPTH
+ }
+#endif
+ }
+ }
+ sb_count = 0;
+ for (sbr = 0; sbr < nvsb; sbr++) {
+ for (sbc = 0; sbc < nhsb; sbc++) {
+ int nvb, nhb;
+ int gi;
+ DECLARE_ALIGNED(32, uint16_t, dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
+ DECLARE_ALIGNED(32, uint16_t,
+ tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
+ nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
+ nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
+ dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
+ sbc * MAX_MIB_SIZE, dlist);
+ if (dering_count == 0) continue;
+ for (gi = 0; gi < DERING_STRENGTHS; gi++) {
+ int threshold;
+ DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
+ uint16_t *in;
+ int j;
+ level = dering_level_table[gi];
+ threshold = level << coeff_shift;
+ for (r = 0; r < nvb << bsize[0]; r++) {
+ for (c = 0; c < nhb << bsize[0]; c++) {
+ dst[(r * MAX_MIB_SIZE << bsize[0]) + c] =
+ src[((sbr * MAX_MIB_SIZE << bsize[0]) + r) * stride +
+ (sbc * MAX_MIB_SIZE << bsize[0]) + c];
+ }
+ }
+ in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
+ /* We avoid filtering the pixels for which some of the pixels to average
+ are outside the frame. We could change the filter instead, but it
+ would
+ add special cases for any future vectorization. */
+ for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
+ inbuf[i] = OD_DERING_VERY_LARGE;
+ for (i = -OD_FILT_VBORDER * (sbr != 0);
+ i < (nvb << bsize[0]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) {
+ for (j = -OD_FILT_HBORDER * (sbc != 0);
+ j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
+ j++) {
+ uint16_t *x;
+ x = &src[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
+ (sbc * MAX_MIB_SIZE << bsize[0])];
+ in[i * OD_FILT_BSTRIDE + j] = x[i * stride + j];
+ }
+ }
+ for (i = 0; i < CLPF_STRENGTHS; i++) {
+ od_dering(tmp_dst, in, 0, dir, 0, dlist, dering_count, threshold,
+ i + (i == 3), clpf_damping, coeff_shift, 0);
+ copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst,
+ dlist, dering_count, bsize[0]);
+ mse[sb_count][gi][i] = (int)compute_dist(
+ dst, MAX_MIB_SIZE << bsize[0],
+ &ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
+ (sbc * MAX_MIB_SIZE << bsize[0])],
+ stride, nhb, nvb, coeff_shift);
+ }
+ sb_index[sb_count] =
+ MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
+ }
+ sb_count++;
+ }
+ }
+ best_tot_mse = (uint64_t)1 << 63;
+
+ int l0;
+ for (l0 = 0; l0 < DERING_STRENGTHS; l0++) {
+ int l1;
+ lev[0] = l0;
+ for (l1 = l0; l1 < DERING_STRENGTHS; l1++) {
+ int l2;
+ lev[1] = l1;
+ for (l2 = l1; l2 < DERING_STRENGTHS; l2++) {
+ int l3;
+ lev[2] = l2;
+ for (l3 = l2; l3 < DERING_STRENGTHS; l3++) {
+ int cs0;
+ lev[3] = l3;
+ for (cs0 = 0; cs0 < CLPF_STRENGTHS; cs0++) {
+ int cs1;
+ str[0] = cs0;
+ for (cs1 = cs0; cs1 < CLPF_STRENGTHS; cs1++) {
+ uint64_t tot_mse = 0;
+ str[1] = cs1;
+ for (i = 0; i < sb_count; i++) {
+ int gi;
+ int cs;
+ uint64_t best_mse = (uint64_t)1 << 63;
+ for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
+ for (cs = 0; cs < CLPF_REFINEMENT_LEVELS; cs++) {
+ if (mse[i][lev[gi]][str[cs]] < best_mse) {
+ best_mse = mse[i][lev[gi]][str[cs]];
+ }
+ }
+ }
+ tot_mse += best_mse;
+ }
+
+ // Add the bit cost
+ int dering_diffs = 0, clpf_diffs = 0;
+ for (i = 1; i < DERING_REFINEMENT_LEVELS; i++)
+ dering_diffs += lev[i] != lev[i - 1];
+ for (i = 1; i < CLPF_REFINEMENT_LEVELS; i++)
+ clpf_diffs += str[i] != str[i - 1];
+ tot_mse += (uint64_t)(sb_count * lambda *
+ (log2[dering_diffs] + log2[clpf_diffs]));
+
+ if (tot_mse < best_tot_mse) {
+ for (i = 0; i < DERING_REFINEMENT_LEVELS; i++)
+ best_lev[i] = lev[i];
+ for (i = 0; i < CLPF_REFINEMENT_LEVELS; i++)
+ best_str[i] = str[i];
+ best_tot_mse = tot_mse;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (i = 0; i < DERING_REFINEMENT_LEVELS; i++) lev[i] = best_lev[i];
+ for (i = 0; i < CLPF_REFINEMENT_LEVELS; i++) str[i] = best_str[i];
+
+ id_to_levels(lev, str, levels_to_id(lev, str)); // Pack tables
+ cdef_get_bits(lev, str, &cm->dering_bits, &cm->clpf_bits);
+
+ for (i = 0; i < sb_count; i++) {
+ int gi, cs;
+ int best_gi, best_clpf;
+ uint64_t best_mse = (uint64_t)1 << 63;
+ best_gi = best_clpf = 0;
+ for (gi = 0; gi < (1 << cm->dering_bits); gi++) {
+ for (cs = 0; cs < (1 << cm->clpf_bits); cs++) {
+ if (mse[i][lev[gi]][str[cs]] < best_mse) {
+ best_gi = gi;
+ best_clpf = cs;
+ best_mse = mse[i][lev[gi]][str[cs]];
+ }
+ }
+ }
+ cm->mi_grid_visible[sb_index[i]]->mbmi.dering_gain = best_gi;
+ cm->mi_grid_visible[sb_index[i]]->mbmi.clpf_strength = best_clpf;
+ }
+
+ aom_free(src);
+ aom_free(ref_coeff);
+ aom_free(mse);
+ aom_free(sb_index);
+
+ av1_clpf_test_plane(cm->frame_to_show, ref, cm, &cm->clpf_strength_u,
+ AOM_PLANE_U);
+ av1_clpf_test_plane(cm->frame_to_show, ref, cm, &cm->clpf_strength_v,
+ AOM_PLANE_V);
+ cm->dering_level = levels_to_id(best_lev, best_str);
+}
diff --git a/av1/encoder/pickdering.c b/av1/encoder/pickdering.c
deleted file mode 100644
index dce7686..0000000
--- a/av1/encoder/pickdering.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-#include <math.h>
-
-#include "./aom_scale_rtcd.h"
-#include "av1/common/dering.h"
-#include "av1/common/onyxc_int.h"
-#include "av1/common/reconinter.h"
-#include "av1/encoder/encoder.h"
-#include "aom/aom_integer.h"
-
-static double compute_dist(int16_t *x, int xstride, int16_t *y, int ystride,
- int nhb, int nvb, int coeff_shift) {
- int i, j;
- double sum;
- sum = 0;
- for (i = 0; i < nvb << 3; i++) {
- for (j = 0; j < nhb << 3; j++) {
- double tmp;
- tmp = x[i * xstride + j] - y[i * ystride + j];
- sum += tmp * tmp;
- }
- }
- return sum / (double)(1 << 2 * coeff_shift);
-}
-
-int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
- AV1_COMMON *cm, MACROBLOCKD *xd) {
- int r, c;
- int sbr, sbc;
- int nhsb, nvsb;
- int16_t *src;
- int16_t *ref_coeff;
- dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
- int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
- int stride;
- int bsize[3];
- int dec[3];
- int pli;
- int level;
- int best_level;
- int dering_count;
- int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
- src = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
- ref_coeff = aom_malloc(sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
- av1_setup_dst_planes(xd->plane, frame, 0, 0);
- for (pli = 0; pli < 3; pli++) {
- dec[pli] = xd->plane[pli].subsampling_x;
- bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli];
- }
- stride = cm->mi_cols << bsize[0];
- for (r = 0; r < cm->mi_rows << bsize[0]; ++r) {
- for (c = 0; c < cm->mi_cols << bsize[0]; ++c) {
-#if CONFIG_AOM_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- src[r * stride + c] = CONVERT_TO_SHORTPTR(
- xd->plane[0].dst.buf)[r * xd->plane[0].dst.stride + c];
- ref_coeff[r * stride + c] =
- CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c];
- } else {
-#endif
- src[r * stride + c] =
- xd->plane[0].dst.buf[r * xd->plane[0].dst.stride + c];
- ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c];
-#if CONFIG_AOM_HIGHBITDEPTH
- }
-#endif
- }
- }
- nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
- nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
- /* Pick a base threshold based on the quantizer. The threshold will then be
- adjusted on a 64x64 basis. We use a threshold of the form T = a*Q^b,
- where a and b are derived empirically trying to optimize rate-distortion
- at different quantizer settings. */
- best_level = AOMMIN(
- MAX_DERING_LEVEL - 1,
- (int)floor(.5 +
- .45 * pow(av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >>
- (cm->bit_depth - 8),
- 0.6)));
- for (sbr = 0; sbr < nvsb; sbr++) {
- for (sbc = 0; sbc < nhsb; sbc++) {
- int nvb, nhb;
- int gi;
- int best_gi;
- int32_t best_mse = INT32_MAX;
- int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
- int16_t tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
- nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
- nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
- dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
- sbc * MAX_MIB_SIZE, dlist);
- if (dering_count == 0) continue;
- best_gi = 0;
- for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
- int cur_mse;
- int threshold;
- int16_t inbuf[OD_DERING_INBUF_SIZE];
- int16_t *in;
- int i, j;
- level = compute_level_from_index(best_level, gi);
- threshold = level << coeff_shift;
- for (r = 0; r < nvb << bsize[0]; r++) {
- for (c = 0; c < nhb << bsize[0]; c++) {
- dst[(r * MAX_MIB_SIZE << bsize[0]) + c] =
- src[((sbr * MAX_MIB_SIZE << bsize[0]) + r) * stride +
- (sbc * MAX_MIB_SIZE << bsize[0]) + c];
- }
- }
- in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
- /* We avoid filtering the pixels for which some of the pixels to average
- are outside the frame. We could change the filter instead, but it
- would
- add special cases for any future vectorization. */
- for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
- inbuf[i] = OD_DERING_VERY_LARGE;
- for (i = -OD_FILT_VBORDER * (sbr != 0);
- i < (nvb << bsize[0]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) {
- for (j = -OD_FILT_HBORDER * (sbc != 0);
- j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
- j++) {
- int16_t *x;
- x = &src[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
- (sbc * MAX_MIB_SIZE << bsize[0])];
- in[i * OD_FILT_BSTRIDE + j] = x[i * stride + j];
- }
- }
- od_dering(tmp_dst, in, 0, dir, 0, dlist, dering_count, threshold,
- coeff_shift);
- copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst,
- dlist, dering_count, bsize[0]);
- cur_mse = (int)compute_dist(
- dst, MAX_MIB_SIZE << bsize[0],
- &ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
- (sbc * MAX_MIB_SIZE << bsize[0])],
- stride, nhb, nvb, coeff_shift);
- if (cur_mse < best_mse) {
- best_gi = gi;
- best_mse = cur_mse;
- }
- }
- cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
- MAX_MIB_SIZE * sbc]
- ->mbmi.dering_gain = best_gi;
- }
- }
- aom_free(src);
- aom_free(ref_coeff);
- return best_level;
-}