[CFL] Subsample to Q3
Result from luma subsampling is left-shifted by 3. This avoids having to
do it during averaging, in alpha search and when building the
prediction. This change does not alter the bitstream.
Results on Subset1
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000
https://arewecompressedyet.com/?job=cfl-baseline%402017-09-06T17%3A41%3A38.041Z&job=cfl-SubsampleQ3%402017-09-06T17%3A42%3A01.252Z
Change-Id: I6e89eac6496f7c36e46364c9223fbcbca6759032
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index b0192d3..e7cb1f8 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -699,10 +699,8 @@
// TODO(ltrudeau) Convert to uint16 for HBD support
uint8_t y_pix[MAX_SB_SQUARE];
- // Pixel buffer containing the downsampled luma pixels used as prediction for
- // chroma
- // TODO(ltrudeau) Convert to uint16 for HBD support
- uint8_t y_down_pix[MAX_SB_SQUARE];
+ // Downsampled luma pixels (in Q3) used for chroma prediction
+ int y_down_pix_q3[MAX_SB_SQUARE];
// Height and width of the luma prediction block currently in the pixel buffer
int y_height, y_width;
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index c51648b..9b33079 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -30,29 +30,30 @@
}
static INLINE void cfl_luma_subsampling_420(const uint8_t *y_pix,
- uint8_t *output, int width,
+ int *output_q3, int width,
int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
int top = i << 1;
int bot = top + MAX_SB_SIZE;
int sum = y_pix[top] + y_pix[top + 1] + y_pix[bot] + y_pix[bot + 1];
- output[i] = (sum + 2) >> 2;
+ // TODO(ltrudeau) replace "+ 2 >> 2 << 3" with << 1
+ output_q3[i] = ((sum + 2) >> 2) << 3;
}
y_pix += MAX_SB_SIZE << 1;
- output += MAX_SB_SIZE;
+ output_q3 += MAX_SB_SIZE;
}
}
static INLINE void cfl_luma_subsampling_444(const uint8_t *y_pix,
- uint8_t *output, int width,
+ int *output_q3, int width,
int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
- output[i] = y_pix[i];
+ output_q3[i] = y_pix[i] << 3;
}
y_pix += MAX_SB_SIZE;
- output += MAX_SB_SIZE;
+ output_q3 += MAX_SB_SIZE;
}
}
@@ -62,20 +63,19 @@
const int sub_y = cfl->subsampling_y;
const int off_log2 = tx_size_wide_log2[0];
- // TODO(ltrudeau) convert to uint16 to add HBD support
- uint8_t *output = cfl->y_down_pix;
+ int *output_q3 = cfl->y_down_pix_q3;
// TODO(ltrudeau) should be faster to downsample when we store the values
// TODO(ltrudeau) add support for 4:2:2
if (sub_y == 0 && sub_x == 0) {
// TODO(ltrudeau) convert to uint16 to add HBD support
const uint8_t *y_pix = cfl->y_pix + ((row * MAX_SB_SIZE + col) << off_log2);
- cfl_luma_subsampling_444(y_pix, output, width, height);
+ cfl_luma_subsampling_444(y_pix, output_q3, width, height);
} else if (sub_y == 1 && sub_x == 1) {
// TODO(ltrudeau) convert to uint16 to add HBD support
const uint8_t *y_pix =
cfl->y_pix + ((row * MAX_SB_SIZE + col) << (off_log2 + sub_y));
- cfl_luma_subsampling_420(y_pix, output, width, height);
+ cfl_luma_subsampling_420(y_pix, output_q3, width, height);
} else {
assert(0); // Unsupported chroma subsampling
}
@@ -100,7 +100,7 @@
for (int j = 0; j < height; j++) {
last_pixel = output_row_offset - 1;
for (int i = 0; i < diff_width; i++) {
- output[output_row_offset + i] = output[last_pixel];
+ output_q3[output_row_offset + i] = output_q3[last_pixel];
}
output_row_offset += MAX_SB_SIZE;
}
@@ -112,7 +112,7 @@
for (int j = 0; j < diff_height; j++) {
for (int i = 0; i < width; i++) {
- output[output_row_offset + i] = output[last_row_offset + i];
+ output_q3[output_row_offset + i] = output_q3[last_row_offset + i];
}
output_row_offset += MAX_SB_SIZE;
}
@@ -200,10 +200,8 @@
const int num_pel_log2 =
(tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
- // TODO(ltrudeau) Convert to uint16 for HBD support
- const uint8_t *y_pix = cfl->y_down_pix;
- // TODO(ltrudeau) Convert to uint16 for HBD support
- const uint8_t *t_y_pix;
+ const int *y_pix_q3 = cfl->y_down_pix_q3;
+ const int *t_y_pix_q3;
int *averages_q3 = cfl->y_averages_q3;
cfl_load(cfl, 0, 0, width, height);
@@ -211,24 +209,23 @@
int a = 0;
for (int b_j = 0; b_j < height; b_j += tx_height) {
for (int b_i = 0; b_i < width; b_i += tx_width) {
- int sum = 0;
- t_y_pix = y_pix;
+ int sum_q3 = 0;
+ t_y_pix_q3 = y_pix_q3;
for (int t_j = 0; t_j < tx_height; t_j++) {
for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
- sum += t_y_pix[t_i];
+ sum_q3 += t_y_pix_q3[t_i];
}
- t_y_pix += MAX_SB_SIZE;
+ t_y_pix_q3 += MAX_SB_SIZE;
}
assert(a < MAX_NUM_TXB_SQUARE);
- averages_q3[a++] =
- ((sum << 3) + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
+ averages_q3[a++] = (sum_q3 + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
// Loss is never more than 1/2 (in Q3)
assert(fabs((double)averages_q3[a - 1] -
- (sum / ((double)(1 << num_pel_log2))) * (1 << 3)) <= 0.5);
+ (sum_q3 / ((double)(1 << num_pel_log2)))) <= 0.5);
}
assert(a % stride == 0);
- y_pix += block_row_stride;
+ y_pix_q3 += block_row_stride;
}
cfl->y_averages_stride = stride;
@@ -255,8 +252,7 @@
const int width = tx_size_wide[tx_size];
const int height = tx_size_high[tx_size];
- // TODO(ltrudeau) Convert to uint16 to support HBD
- const uint8_t *y_pix = cfl->y_down_pix;
+ const int *y_pix_q3 = cfl->y_down_pix_q3;
const int dc_pred = cfl->dc_pred[plane - 1];
const int alpha_q3 =
@@ -273,11 +269,11 @@
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
// TODO(ltrudeau) add support for HBD.
- dst[i] =
- clip_pixel(get_scaled_luma_q0(alpha_q3, y_pix[i], avg_q3) + dc_pred);
+ dst[i] = clip_pixel(get_scaled_luma_q0(alpha_q3, y_pix_q3[i], avg_q3) +
+ dc_pred);
}
dst += dst_stride;
- y_pix += MAX_SB_SIZE;
+ y_pix_q3 += MAX_SB_SIZE;
}
}
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 7a56a49..e6de1b1 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -14,8 +14,8 @@
#include "av1/common/blockd.h"
-static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) {
- int scaled_luma_q6 = alpha_q3 * ((y_pix << 3) - avg_q3);
+static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix_q3, int avg_q3) {
+ int scaled_luma_q6 = alpha_q3 * (y_pix_q3 - avg_q3);
return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
}