Fix the dual loopfilter for cb4x4
In cb4x4, dual loopfilter filters 2 * 4 = 8 pixels.
This patch does not influence encoder/decoder since
they are not applied in bit mask implementation.
Change-Id: Ifdeb8990127de39143971156db69a69ee3bd3136
diff --git a/aom_dsp/loopfilter.c b/aom_dsp/loopfilter.c
index 10bc72f..c214419 100644
--- a/aom_dsp/loopfilter.c
+++ b/aom_dsp/loopfilter.c
@@ -201,7 +201,11 @@
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1);
+#else
aom_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
+#endif
}
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
@@ -236,7 +240,11 @@
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
+#else
aom_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
+#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
@@ -333,7 +341,11 @@
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1);
+#else
aom_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
+#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
@@ -384,7 +396,11 @@
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
aom_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
+#else
aom_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
+#endif
}
#if PARALLEL_DEBLOCKING_13_TAP
@@ -674,7 +690,11 @@
void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
+#if CONFIG_PARALLEL_DEBLOCKING
+ mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
+#else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
+#endif
}
#if CONFIG_HIGHBITDEPTH
@@ -842,7 +862,11 @@
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_highbd_lpf_horizontal_4_c(s + 4, p, blimit1, limit1, thresh1, bd);
+#else
aom_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd);
+#endif
}
void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
@@ -879,8 +903,13 @@
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_highbd_lpf_vertical_4_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
+ bd);
+#else
aom_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1,
bd);
+#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
@@ -981,7 +1010,11 @@
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_highbd_lpf_horizontal_8_c(s + 4, p, blimit1, limit1, thresh1, bd);
+#else
aom_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd);
+#endif
}
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
@@ -1036,8 +1069,13 @@
const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1, int bd) {
aom_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_highbd_lpf_vertical_8_c(s + 4 * pitch, pitch, blimit1, limit1, thresh1,
+ bd);
+#else
aom_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1,
bd);
+#endif
}
#if PARALLEL_DEBLOCKING_13_TAP
diff --git a/aom_dsp/x86/highbd_loopfilter_sse2.c b/aom_dsp/x86/highbd_loopfilter_sse2.c
index 7571cb0..d2ab76e 100644
--- a/aom_dsp/x86/highbd_loopfilter_sse2.c
+++ b/aom_dsp/x86/highbd_loopfilter_sse2.c
@@ -662,7 +662,11 @@
const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
const uint8_t *_thresh1, int bd) {
aom_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_highbd_lpf_horizontal_8_sse2(s + 4, p, _blimit1, _limit1, _thresh1, bd);
+#else
aom_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
+#endif
}
void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
@@ -859,7 +863,11 @@
const uint8_t *_thresh0, const uint8_t *_blimit1, const uint8_t *_limit1,
const uint8_t *_thresh1, int bd) {
aom_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+#if CONFIG_PARALLEL_DEBLOCKING
+ aom_highbd_lpf_horizontal_4_sse2(s + 4, p, _blimit1, _limit1, _thresh1, bd);
+#else
aom_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
+#endif
}
void aom_highbd_lpf_vertical_4_sse2(uint16_t *s, int p, const uint8_t *blimit,