Comprehensive support for symmetric DST
Creates new hybrid transforms combining symmetric DST with
ADST and DCT. Thus a total of 16 transforms are supported.
derfl: +1.659% (up about 0.2%)
Change-Id: Idde1cecdb59527890bf05da740099c3f6a5b9764
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 70983ca..597cf12 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -239,6 +239,13 @@
DCT_ADST,
FLIPADST_DCT,
DCT_FLIPADST,
+ DST_DST,
+ DST_DCT,
+ DCT_DST,
+ DST_ADST,
+ ADST_DST,
+ DST_FLIPADST,
+ FLIPADST_DST,
};
#endif // CONFIG_EXT_TX
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index b9d5af9..c638423 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -317,19 +317,26 @@
#if CONFIG_EXT_TX
const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)] = {
-NORM, 2,
- 4, 10,
- 6, 8,
- -ALT1, -ALT2,
- -ALT3, -ALT4,
+ -ALT9, 4,
+ 6, 16,
+ 8, 10,
+ -ALT10, -ALT11,
12, 14,
- -ALT5, -ALT6,
- -ALT7, -ALT8,
+ -ALT1, -ALT2,
+ -ALT4, -ALT5,
+ 18, 24,
+ 20, 22,
+ -ALT12, -ALT13,
+ -ALT14, -ALT15,
+ 26, 28,
+ -ALT3, -ALT6,
+ -ALT7, -ALT8
};
static const vpx_prob default_ext_tx_prob[EXT_TX_SIZES][EXT_TX_TYPES - 1] = {
- { 240, 128, 128, 128, 128, 128, 128, 128 },
- { 208, 128, 128, 128, 128, 128, 128, 128 },
- { 176, 128, 128, 128, 128, 128, 128, 128 },
+ { 216, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 192, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 168, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
};
#endif // CONFIG_EXT_TX
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index a63b493..a4a5440 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -100,6 +100,13 @@
FLIPADST_FLIPADST = 6,
ADST_FLIPADST = 7,
FLIPADST_ADST = 8,
+ DST_DST = 9,
+ DST_DCT = 10,
+ DCT_DST = 11,
+ DST_ADST = 12,
+ ADST_DST = 13,
+ DST_FLIPADST = 14,
+ FLIPADST_DST = 15,
#endif // CONFIG_EXT_TX
TX_TYPES,
} TX_TYPE;
@@ -116,6 +123,13 @@
ALT6 = 6,
ALT7 = 7,
ALT8 = 8,
+ ALT9 = 9,
+ ALT10 = 10,
+ ALT11 = 11,
+ ALT12 = 12,
+ ALT13 = 13,
+ ALT14 = 14,
+ ALT15 = 15,
EXT_TX_TYPES
} EXT_TX_TYPE;
#endif // CONFIG_EXT_TX
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 6533690..9e1f870 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -17,45 +17,170 @@
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
-void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- int tx_type) {
- const transform_2d IHT_4[] = {
- { idct4_c, idct4_c }, // DCT_DCT = 0
- { iadst4_c, idct4_c }, // ADST_DCT = 1
- { idct4_c, iadst4_c }, // DCT_ADST = 2
- { iadst4_c, iadst4_c } // ADST_ADST = 3
+#if CONFIG_EXT_TX
+void idst4_c(const tran_low_t *input, tran_low_t *output) {
+ static const int N = 4;
+ static const int sinvalue_lookup_table[] = {
+ 9630, 15582
};
-
+ static const int mult = 14654; // sqrt(4/5)
int i, j;
- tran_low_t out[4 * 4];
- tran_low_t *outptr = out;
- tran_low_t temp_in[4], temp_out[4];
-
- // inverse transform row vectors
- for (i = 0; i < 4; ++i) {
- IHT_4[tx_type].rows(input, outptr);
- input += 4;
- outptr += 4;
- }
-
- // inverse transform column vectors
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j * 4 + i];
- IHT_4[tx_type].cols(temp_in, temp_out);
- for (j = 0; j < 4; ++j) {
- dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
- ROUND_POWER_OF_TWO(temp_out[j], 4));
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
}
+ sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+ output[i] = WRAPLOW(sum, 8);
}
}
-static const transform_2d IHT_8[] = {
- { idct8_c, idct8_c }, // DCT_DCT = 0
- { iadst8_c, idct8_c }, // ADST_DCT = 1
- { idct8_c, iadst8_c }, // DCT_ADST = 2
- { iadst8_c, iadst8_c } // ADST_ADST = 3
-};
+void idst8_c(const tran_low_t *input, tran_low_t *output) {
+ static const int N = 8;
+ static const int sinvalue_lookup_table[] = {
+ 5604, 10531, 14189, 16135
+ };
+ static const int mult = 15447; // 2*sqrt(2/9)
+ int i, j;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+ output[i] = WRAPLOW(sum, 8);
+ }
+}
+
+void idst16_c(const tran_low_t *input, tran_low_t *output) {
+ static const int N = 16;
+ static const int sinvalue_lookup_table[] = {
+ 3011, 5919, 8625, 11038,
+ 13075, 14666, 15759, 16314
+ };
+ static const int mult = 15895; // 2*sqrt(4/17)
+ int i, j;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+ output[i] = WRAPLOW(sum, 8);
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ static const int N = 4;
+ static const int sinvalue_lookup_table[] = {
+ 9630, 15582
+ };
+ static const int mult = 14654; // sqrt(4/5)
+ int i, j;
+ (void) bd;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+ output[i] = WRAPLOW(sum, bd);
+ }
+}
+
+void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ static const int N = 8;
+ static const int sinvalue_lookup_table[] = {
+ 5604, 10531, 14189, 16135
+ };
+ static const int mult = 15447; // 2*sqrt(2/9)
+ int i, j;
+ (void) bd;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+ output[i] = WRAPLOW(sum, bd);
+ }
+}
+
+void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ static const int N = 16;
+ static const int sinvalue_lookup_table[] = {
+ 3011, 5919, 8625, 11038,
+ 13075, 14666, 15759, 16314
+ };
+ static const int mult = 15895; // 2*sqrt(4/17)
+ int i, j;
+ (void) bd;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+ output[i] = WRAPLOW(sum, bd);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
#if CONFIG_EXT_TX
void fliplr(uint8_t *dest, int stride, int l) {
@@ -125,8 +250,76 @@
}
#endif // CONFIG_EXT_TX
+void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ const transform_2d IHT_4[] = {
+ { idct4_c, idct4_c }, // DCT_DCT = 0
+ { iadst4_c, idct4_c }, // ADST_DCT = 1
+ { idct4_c, iadst4_c }, // DCT_ADST = 2
+ { iadst4_c, iadst4_c }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { iadst4_c, idct4_c }, // FLIPADST_DCT = 4
+ { idct4_c, iadst4_c }, // DCT_FLIPADST = 5
+ { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6
+ { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7
+ { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8
+ { idst4_c, idst4_c }, // DST_DST = 9
+ { idst4_c, idct4_c }, // DST_DCT = 10
+ { idct4_c, idst4_c }, // DCT_DST = 11
+ { idst4_c, iadst4_c }, // DST_ADST = 12
+ { iadst4_c, idst4_c }, // ADST_DST = 13
+ { idst4_c, iadst4_c }, // DST_FLIPADST = 14
+ { iadst4_c, idst4_c }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
+ int i, j;
+ tran_low_t out[4 * 4];
+ tran_low_t *outptr = out;
+ tran_low_t temp_in[4], temp_out[4];
+
+ // inverse transform row vectors
+ for (i = 0; i < 4; ++i) {
+ IHT_4[tx_type].rows(input, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j * 4 + i];
+ IHT_4[tx_type].cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 4));
+ }
+ }
+}
+
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
+ static const transform_2d IHT_8[] = {
+ { idct8_c, idct8_c }, // DCT_DCT = 0
+ { iadst8_c, idct8_c }, // ADST_DCT = 1
+ { idct8_c, iadst8_c }, // DCT_ADST = 2
+ { iadst8_c, iadst8_c }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { iadst8_c, idct8_c }, // FLIPADST_DCT = 4
+ { idct8_c, iadst8_c }, // DCT_FLIPADST = 5
+ { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6
+ { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7
+ { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8
+ { idst8_c, idst8_c }, // DST_DST = 9
+ { idst8_c, idct8_c }, // DST_DCT = 10
+ { idct8_c, idst8_c }, // DCT_DST = 11
+ { idst8_c, iadst8_c }, // DST_ADST = 12
+ { iadst8_c, idst8_c }, // ADST_DST = 13
+ { idst8_c, iadst8_c }, // DST_FLIPADST = 14
+ { iadst8_c, idst8_c }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
int i, j;
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
@@ -152,15 +345,29 @@
}
}
-static const transform_2d IHT_16[] = {
- { idct16_c, idct16_c }, // DCT_DCT = 0
- { iadst16_c, idct16_c }, // ADST_DCT = 1
- { idct16_c, iadst16_c }, // DCT_ADST = 2
- { iadst16_c, iadst16_c } // ADST_ADST = 3
-};
-
void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
+ static const transform_2d IHT_16[] = {
+ { idct16_c, idct16_c }, // DCT_DCT = 0
+ { iadst16_c, idct16_c }, // ADST_DCT = 1
+ { idct16_c, iadst16_c }, // DCT_ADST = 2
+ { iadst16_c, iadst16_c }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { iadst16_c, idct16_c }, // FLIPADST_DCT = 4
+ { idct16_c, iadst16_c }, // DCT_FLIPADST = 5
+ { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6
+ { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7
+ { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8
+ { idst16_c, idst16_c }, // DST_DST = 9
+ { idst16_c, idct16_c }, // DST_DCT = 10
+ { idct16_c, idst16_c }, // DCT_DST = 11
+ { idst16_c, iadst16_c }, // DST_ADST = 12
+ { iadst16_c, idst16_c }, // ADST_DST = 13
+ { idst16_c, iadst16_c }, // DST_FLIPADST = 14
+ { iadst16_c, idst16_c }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
int i, j;
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
@@ -286,6 +493,24 @@
vp10_iht4x4_16_add(input, dest, stride, ADST_ADST);
flipud(dest, stride, 4);
break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST only exists in C code
+ vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
+ break;
+ case FLIPADST_DST:
+ flipud(dest, stride, 4);
+ vp10_iht4x4_16_add_c(input, dest, stride, ADST_DST);
+ flipud(dest, stride, 4);
+ break;
+ case DST_FLIPADST:
+ fliplr(dest, stride, 4);
+ vp10_iht4x4_16_add_c(input, dest, stride, DST_ADST);
+ fliplr(dest, stride, 4);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -330,6 +555,24 @@
vp10_iht8x8_64_add(input, dest, stride, ADST_ADST);
flipud(dest, stride, 8);
break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST only exists in C code
+ vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
+ break;
+ case FLIPADST_DST:
+ flipud(dest, stride, 8);
+ vp10_iht8x8_64_add_c(input, dest, stride, ADST_DST);
+ flipud(dest, stride, 8);
+ break;
+ case DST_FLIPADST:
+ fliplr(dest, stride, 8);
+ vp10_iht8x8_64_add_c(input, dest, stride, DST_ADST);
+ fliplr(dest, stride, 8);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -374,6 +617,24 @@
vp10_iht16x16_256_add(input, dest, stride, ADST_ADST);
flipud(dest, stride, 16);
break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST only exists in C code
+ vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
+ break;
+ case FLIPADST_DST:
+ flipud(dest, stride, 16);
+ vp10_iht16x16_256_add_c(input, dest, stride, ADST_DST);
+ flipud(dest, stride, 16);
+ break;
+ case DST_FLIPADST:
+ fliplr(dest, stride, 16);
+ vp10_iht16x16_256_add_c(input, dest, stride, DST_ADST);
+ fliplr(dest, stride, 16);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -402,10 +663,24 @@
void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
const highbd_transform_2d IHT_4[] = {
- { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8
+ { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 9
+ { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 10
+ { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 11
+ { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 12
+ { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 13
+ { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 14
+ { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -433,15 +708,29 @@
}
}
-static const highbd_transform_2d HIGH_IHT_8[] = {
- { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
-};
-
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8
+ { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 9
+ { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 10
+ { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 11
+ { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 12
+ { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 13
+ { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 14
+ { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
int i, j;
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
@@ -468,15 +757,29 @@
}
}
-static const highbd_transform_2d HIGH_IHT_16[] = {
- { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
-};
-
void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_16[] = {
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8
+ { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 9
+ { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 10
+ { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 11
+ { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 12
+ { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 13
+ { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 14
+ { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
+ };
+
int i, j;
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
@@ -606,6 +909,24 @@
vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd);
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case FLIPADST_DST:
+ flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+ vp10_highbd_iht4x4_16_add_c(input, dest, stride, ADST_DST, bd);
+ flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+ break;
+ case DST_FLIPADST:
+ fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+ vp10_highbd_iht4x4_16_add_c(input, dest, stride, DST_ADST, bd);
+ fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -651,6 +972,24 @@
vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd);
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case FLIPADST_DST:
+ flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+ vp10_highbd_iht8x8_64_add_c(input, dest, stride, ADST_DST, bd);
+ flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+ break;
+ case DST_FLIPADST:
+ fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+ vp10_highbd_iht8x8_64_add_c(input, dest, stride, DST_ADST, bd);
+ fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -696,6 +1035,24 @@
vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd);
flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
break;
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case FLIPADST_DST:
+ flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+ vp10_highbd_iht16x16_256_add_c(input, dest, stride, ADST_DST, bd);
+ flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+ break;
+ case DST_FLIPADST:
+ fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+ vp10_highbd_iht16x16_256_add_c(input, dest, stride, DST_ADST, bd);
+ fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 4cb78e8..57095d9 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -696,7 +696,6 @@
};
#if CONFIG_EXT_TX
-
const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
{ // TX_4X4
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
@@ -708,6 +707,13 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
}, { // TX_8X8
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
@@ -718,6 +724,13 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
}, { // TX_16X16
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
@@ -728,6 +741,13 @@
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
@@ -738,6 +758,13 @@
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
}
};
@@ -752,6 +779,13 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
}, { // TX_8X8
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
@@ -762,6 +796,13 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
}, { // TX_16X16
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
@@ -772,6 +813,13 @@
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+ {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
@@ -782,6 +830,13 @@
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+ {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
}
};
@@ -810,5 +865,4 @@
{default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
}
};
-
#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 288d5d0..cb1ede2 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -20,6 +20,84 @@
#include "vpx_dsp/fwd_txfm.h"
#include "vpx_ports/mem.h"
+#if CONFIG_EXT_TX
+void fdst4(const tran_low_t *input, tran_low_t *output) {
+ static const int N = 4;
+ static const int sinvalue_lookup_table[] = {
+ 9630, 15582
+ };
+ static const int mult = 14654; // sqrt(4/5)
+ int i, j;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+ }
+}
+
+void fdst8(const tran_low_t *input, tran_low_t *output) {
+ static const int N = 8;
+ static const int sinvalue_lookup_table[] = {
+ 5604, 10531, 14189, 16135
+ };
+ static const int mult = 15447; // 2*sqrt(2/9)
+ int i, j;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+ }
+}
+
+void fdst16(const tran_low_t *input, tran_low_t *output) {
+ static const int N = 16;
+ static const int sinvalue_lookup_table[] = {
+ 3011, 5919, 8625, 11038,
+ 13075, 14666, 15759, 16314
+ };
+ static const int mult = 15895; // 2*sqrt(4/17)
+ int i, j;
+ for (i = 0; i < N; i++) {
+ int64_t sum = 0;
+ for (j = 0; j < N; j++) {
+ int idx = (i + 1) * (j + 1);
+ int sign = 0;
+ if (idx > N + 1) {
+ sign = (idx / (N + 1)) & 1;
+ idx %= (N + 1);
+ }
+ idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+ if (idx == 0) continue;
+ idx--;
+ sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+ }
+ output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+ }
+}
+#endif // CONFIG_EXT_TX
+
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t step[4];
tran_high_t temp1, temp2;
@@ -510,25 +588,67 @@
{ fdct4, fdct4 }, // DCT_DCT = 0
{ fadst4, fdct4 }, // ADST_DCT = 1
{ fdct4, fadst4 }, // DCT_ADST = 2
- { fadst4, fadst4 } // ADST_ADST = 3
+ { fadst4, fadst4 }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { fadst4, fdct4 }, // FLIPADST_DCT = 4
+ { fdct4, fadst4 }, // DCT_FLIPADST = 5
+ { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6
+ { fadst4, fadst4 }, // ADST_FLIPADST = 7
+ { fadst4, fadst4 }, // FLIPADST_ADST = 8
+ { fdst4, fdst4 }, // DST_DST = 9
+ { fdst4, fdct4 }, // DST_DCT = 10
+ { fdct4, fdst4 }, // DCT_DST = 11
+ { fdst4, fadst4 }, // DST_ADST = 12
+ { fadst4, fdst4 }, // ADST_DST = 13
+ { fdst4, fadst4 }, // DST_FLIPADST = 14
+ { fadst4, fdst4 }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_8[] = {
{ fdct8, fdct8 }, // DCT_DCT = 0
{ fadst8, fdct8 }, // ADST_DCT = 1
{ fdct8, fadst8 }, // DCT_ADST = 2
- { fadst8, fadst8 } // ADST_ADST = 3
+ { fadst8, fadst8 }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { fadst8, fdct8 }, // FLIPADST_DCT = 4
+ { fdct8, fadst8 }, // DCT_FLIPADST = 5
+ { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6
+ { fadst8, fadst8 }, // ADST_FLIPADST = 7
+ { fadst8, fadst8 }, // FLIPADST_ADST = 8
+ { fdst8, fdst8 }, // DST_DST = 9
+ { fdst8, fdct8 }, // DST_DCT = 10
+ { fdct8, fdst8 }, // DCT_DST = 11
+ { fdst8, fadst8 }, // DST_ADST = 12
+ { fadst8, fdst8 }, // ADST_DST = 13
+ { fdst8, fadst8 }, // DST_FLIPADST = 14
+ { fadst8, fdst8 }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_16[] = {
{ fdct16, fdct16 }, // DCT_DCT = 0
{ fadst16, fdct16 }, // ADST_DCT = 1
{ fdct16, fadst16 }, // DCT_ADST = 2
- { fadst16, fadst16 } // ADST_ADST = 3
+ { fadst16, fadst16 }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { fadst16, fdct16 }, // FLIPADST_DCT = 4
+ { fdct16, fadst16 }, // DCT_FLIPADST = 5
+ { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6
+ { fadst16, fadst16 }, // ADST_FLIPADST = 7
+ { fadst16, fadst16 }, // FLIPADST_ADST = 8
+ { fdst16, fdst16 }, // DST_DST = 9
+ { fdst16, fdct16 }, // DST_DCT = 10
+ { fdct16, fdst16 }, // DCT_DST = 11
+ { fdst16, fadst16 }, // DST_ADST = 12
+ { fadst16, fdst16 }, // ADST_DST = 13
+ { fdst16, fadst16 }, // DST_FLIPADST = 14
+ { fadst16, fdst16 }, // FLIPADST_DST = 15
+#endif // CONFIG_EXT_TX
};
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct4x4_c(input, output, stride);
} else {
@@ -560,15 +680,15 @@
}
void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
- tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
int eob = -1;
int i, j;
@@ -672,7 +792,7 @@
}
void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct8x8_c(input, output, stride);
} else {
@@ -758,7 +878,7 @@
}
void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
+ int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vpx_fdct16x16_c(input, output, stride);
} else {
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 561835f..418a629 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -424,6 +424,22 @@
copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
vp10_fht4x4(src_diff2, coeff, 4, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+ vp10_fht4x4_c(src_diff2, coeff, 4, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+ vp10_fht4x4_c(src_diff2, coeff, 4, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -464,6 +480,22 @@
copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -504,6 +536,22 @@
copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -544,6 +592,22 @@
copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -584,6 +648,22 @@
copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -665,6 +745,22 @@
copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+ vp10_highbd_fht4x4_c(src_diff2, coeff, 4, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+ vp10_highbd_fht4x4_c(src_diff2, coeff, 4, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -707,6 +803,22 @@
copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_highbd_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_highbd_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -749,6 +861,22 @@
copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_highbd_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+ vp10_highbd_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -791,6 +919,22 @@
copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_highbd_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_highbd_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -833,6 +977,22 @@
copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
break;
+ case DST_DST:
+ case DCT_DST:
+ case DST_DCT:
+ case DST_ADST:
+ case ADST_DST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case DST_FLIPADST:
+ copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_highbd_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+ break;
+ case FLIPADST_DST:
+ copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+ vp10_highbd_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+ break;
#endif // CONFIG_EXT_TX
default:
assert(0);