Support 64x64 intra prediction
Change-Id: I2536b5b55f28c2ee59445c3b70d3e073e69945cd
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 6d230d2..c15316b 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -314,6 +314,62 @@
add_proto qw/void aom_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_dc_128_predictor_32x32 msa neon sse2/;
+if ((aom_config("CONFIG_TX64X64") eq "yes")) {
+ add_proto qw/void aom_d207_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d207_predictor_64x64/;
+
+ add_proto qw/void aom_d207e_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d207e_predictor_64x64/;
+
+ add_proto qw/void aom_d45_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d45_predictor_64x64/;
+
+ add_proto qw/void aom_d45e_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d45e_predictor_64x64/;
+
+ add_proto qw/void aom_d63_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d63_predictor_64x64/;
+
+ add_proto qw/void aom_d63e_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d63e_predictor_64x64/;
+
+ add_proto qw/void aom_h_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_h_predictor_64x64/;
+
+ add_proto qw/void aom_d117_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d117_predictor_64x64/;
+
+ add_proto qw/void aom_d135_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d135_predictor_64x64/;
+
+ add_proto qw/void aom_d153_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_d153_predictor_64x64/;
+
+ add_proto qw/void aom_v_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_v_predictor_64x64/;
+
+ if ((aom_config("CONFIG_ALT_INTRA") eq "yes")) {
+ add_proto qw/void aom_paeth_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_paeth_predictor_64x64/;
+ } else {
+ add_proto qw/void aom_tm_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_tm_predictor_64x64/;
+ } # CONFIG_ALT_INTRA
+
+
+ add_proto qw/void aom_dc_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_dc_predictor_64x64/;
+
+ add_proto qw/void aom_dc_top_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_dc_top_predictor_64x64/;
+
+ add_proto qw/void aom_dc_left_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_dc_left_predictor_64x64/;
+
+ add_proto qw/void aom_dc_128_predictor_64x64/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+ specialize qw/aom_dc_128_predictor_64x64/;
+}
+
# High bitdepth functions
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
@@ -524,6 +580,60 @@
add_proto qw/void aom_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/aom_highbd_dc_128_predictor_32x32/;
+
+ if ((aom_config("CONFIG_TX64X64") eq "yes")) {
+ add_proto qw/void aom_highbd_d207_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d207_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d207e_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d207e_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d45_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d45_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d45e_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d45e_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d63_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d63_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d63e_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d63e_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_h_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_h_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d117_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d117_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d135_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d135_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_d153_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_d153_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_v_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_v_predictor_64x64/;
+
+ if ((aom_config("CONFIG_ALT_INTRA") eq "yes")) {
+ add_proto qw/void aom_highbd_paeth_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ } else {
+ add_proto qw/void aom_highbd_tm_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_tm_predictor_64x64/;
+ } # CONFIG_ALT_INTRA
+
+ add_proto qw/void aom_highbd_dc_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_dc_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_dc_top_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_dc_top_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_dc_left_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_dc_left_predictor_64x64/;
+
+ add_proto qw/void aom_highbd_dc_128_predictor_64x64/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/aom_highbd_dc_128_predictor_64x64/;
+ }
} # CONFIG_AOM_HIGHBITDEPTH
#
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index 29b5a74..1307415 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -149,6 +149,15 @@
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int i;
+#if CONFIG_TX64X64
+#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
+ // silence a spurious -Warray-bounds warning, possibly related to:
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
+ uint8_t border[133];
+#else
+ uint8_t border[64 + 64 - 1]; // outer border from bottom-left to top-right
+#endif
+#else
#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
// silence a spurious -Warray-bounds warning, possibly related to:
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
@@ -156,6 +165,7 @@
#else
uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right
#endif
+#endif // CONFIG_TX64X64
// dst(bs, bs - 2)[0], i.e., border starting at bottom-left
for (i = 0; i < bs - 2; ++i) {
@@ -965,6 +975,31 @@
}
/* clang-format off */
+#if CONFIG_TX64X64
+#define intra_pred_allsizes(type) \
+ intra_pred_sized(type, 2) \
+ intra_pred_sized(type, 4) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32) \
+ intra_pred_sized(type, 64) \
+ intra_pred_highbd_sized(type, 4) \
+ intra_pred_highbd_sized(type, 8) \
+ intra_pred_highbd_sized(type, 16) \
+ intra_pred_highbd_sized(type, 32) \
+ intra_pred_highbd_sized(type, 64)
+
+#define intra_pred_above_4x4(type) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32) \
+ intra_pred_sized(type, 64) \
+ intra_pred_highbd_sized(type, 4) \
+ intra_pred_highbd_sized(type, 8) \
+ intra_pred_highbd_sized(type, 16) \
+ intra_pred_highbd_sized(type, 32) \
+ intra_pred_highbd_sized(type, 64)
+#else // CONFIG_TX64X64
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 2) \
intra_pred_sized(type, 4) \
@@ -984,8 +1019,25 @@
intra_pred_highbd_sized(type, 8) \
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32)
+#endif // CONFIG_TX64X64
#else
+
+#if CONFIG_TX64X64
+#define intra_pred_allsizes(type) \
+ intra_pred_sized(type, 2) \
+ intra_pred_sized(type, 4) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32) \
+ intra_pred_sized(type, 64)
+
+#define intra_pred_above_4x4(type) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32) \
+ intra_pred_sized(type, 64)
+#else // CONFIG_TX64X64
#define intra_pred_allsizes(type) \
intra_pred_sized(type, 2) \
intra_pred_sized(type, 4) \
@@ -997,6 +1049,7 @@
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32)
+#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
intra_pred_above_4x4(d207)
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 693ad80..809e9b4 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -322,10 +322,18 @@
#endif // CONFIG_AOM_HIGHBITDEPTH
static void av1_init_intra_predictors_internal(void) {
+#if CONFIG_TX64X64
+#define INIT_NO_4X4(p, type) \
+ p[TX_8X8] = aom_##type##_predictor_8x8; \
+ p[TX_16X16] = aom_##type##_predictor_16x16; \
+ p[TX_32X32] = aom_##type##_predictor_32x32; \
+ p[TX_64X64] = aom_##type##_predictor_64x64
+#else
#define INIT_NO_4X4(p, type) \
p[TX_8X8] = aom_##type##_predictor_8x8; \
p[TX_16X16] = aom_##type##_predictor_16x16; \
p[TX_32X32] = aom_##type##_predictor_32x32
+#endif // CONFIG_TX64X64
#define INIT_ALL_SIZES(p, type) \
p[TX_4X4] = aom_##type##_predictor_4x4; \
@@ -910,17 +918,46 @@
{ 589, 646, -495, 255 },
{ 740, 884, -728, 77 },
},
+#if CONFIG_TX64X64
+ {
+ { 477, 737, -393, 150 },
+ { 881, 630, -546, 67 },
+ { 506, 984, -443, -20 },
+ { 114, 459, -270, 528 },
+ { 433, 528, 14, 3 },
+ { 837, 470, -301, -30 },
+ { 181, 777, 89, -107 },
+ { -29, 716, -232, 259 },
+ { 589, 646, -495, 255 },
+ { 740, 884, -728, 77 },
+ },
+#endif // CONFIG_TX64X64
};
+static INLINE TX_SIZE get_txsize_from_blocklen(int bs) {
+ switch (bs) {
+ case 4: return TX_4X4;
+ case 8: return TX_8X8;
+ case 16: return TX_16X16;
+ case 32: return TX_32X32;
+#if CONFIG_TX64X64
+ case 64: return TX_64X64;
+#endif // CONFIG_TX64X64
+ default: assert(0);
+ }
+}
+
static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above,
const uint8_t *left, int mode) {
int k, r, c;
- int buffer[33][65];
int mean, ipred;
- const TX_SIZE tx_size =
- (bs == 32) ? TX_32X32
- : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+#if CONFIG_TX64X64
+ int buffer[65][129];
+#else
+ int buffer[33][65];
+#endif // CONFIG_TX64X64
+ const TX_SIZE tx_size = get_txsize_from_blocklen(bs);
const int c0 = av1_filter_intra_taps_4[tx_size][mode][0];
const int c1 = av1_filter_intra_taps_4[tx_size][mode][1];
const int c2 = av1_filter_intra_taps_4[tx_size][mode][2];
@@ -1040,11 +1077,13 @@
const uint16_t *left, int mode,
int bd) {
int k, r, c;
- int preds[33][65];
int mean, ipred;
- const TX_SIZE tx_size =
- (bs == 32) ? TX_32X32
- : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+#if CONFIG_TX64X64
+ int preds[65][129];
+#else
+ int preds[33][65];
+#endif // CONFIG_TX64X64
+ const TX_SIZE tx_size = get_txsize_from_blocklen(bs);
const int c0 = av1_filter_intra_taps_4[tx_size][mode][0];
const int c1 = av1_filter_intra_taps_4[tx_size][mode][1];
const int c2 = av1_filter_intra_taps_4[tx_size][mode][2];