Speed up of DST and the search in ext_tx
Adds an early termination to the ext_tx search, and also
implements the DST transforms more efficiently.
About 4 times faster with the ext-tx experiment.
There is a 0.09% drop in performance on derflr from 1.735% to
1.648%, but worth it with the speedup achieved.
Change-Id: I2ede9d69c557f25e0a76cd5d701cc0e36e825c7c
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index af39d96..8a3e6cc 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -242,13 +242,13 @@
FLIPADST_FLIPADST,
ADST_FLIPADST,
FLIPADST_ADST,
- DST_DST,
DST_DCT,
DCT_DST,
DST_ADST,
ADST_DST,
DST_FLIPADST,
FLIPADST_DST,
+ DST_DST,
};
#endif // CONFIG_EXT_TX
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 110af78..1fb28af 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -317,17 +317,17 @@
#if CONFIG_EXT_TX
const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)] = {
-NORM, 2,
- -ALT9, 4,
+ -ALT15, 4,
6, 16,
8, 10,
- -ALT10, -ALT11,
+ -ALT9, -ALT10,
12, 14,
-ALT1, -ALT2,
-ALT4, -ALT5,
18, 24,
20, 22,
- -ALT12, -ALT13,
- -ALT14, -ALT15,
+ -ALT11, -ALT12,
+ -ALT13, -ALT14,
26, 28,
-ALT3, -ALT6,
-ALT7, -ALT8
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index a4a5440..d63ed4e 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -100,13 +100,13 @@
FLIPADST_FLIPADST = 6,
ADST_FLIPADST = 7,
FLIPADST_ADST = 8,
- DST_DST = 9,
- DST_DCT = 10,
- DCT_DST = 11,
- DST_ADST = 12,
- ADST_DST = 13,
- DST_FLIPADST = 14,
- FLIPADST_DST = 15,
+ DST_DCT = 9,
+ DCT_DST = 10,
+ DST_ADST = 11,
+ ADST_DST = 12,
+ DST_FLIPADST = 13,
+ FLIPADST_DST = 14,
+ DST_DST = 15,
#endif // CONFIG_EXT_TX
TX_TYPES,
} TX_TYPE;
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 62505ea..5607c6f 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -19,159 +19,332 @@
#if CONFIG_EXT_TX
void idst4_c(const tran_low_t *input, tran_low_t *output) {
- static const int N = 4;
// {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
static const int32_t sinvalue_lookup[] = {
141124871, 228344838,
};
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, 8);
- }
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
}
void idst8_c(const tran_low_t *input, tran_low_t *output) {
- static const int N = 8;
// {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
static const int32_t sinvalue_lookup[] = {
86559612, 162678858, 219176632, 249238470
};
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, 8);
- }
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
}
void idst16_c(const tran_low_t *input, tran_low_t *output) {
- static const int N = 16;
// {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
static const int32_t sinvalue_lookup[] = {
47852167, 94074787, 137093803, 175444254,
207820161, 233119001, 250479254, 259309736
};
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, 8);
- }
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
}
#if CONFIG_VP9_HIGHBITDEPTH
void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
- static const int N = 4;
// {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
static const int32_t sinvalue_lookup[] = {
141124871, 228344838,
};
- int i, j;
- (void) bd;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, bd);
- }
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
}
void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
- static const int N = 8;
// {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
static const int32_t sinvalue_lookup[] = {
86559612, 162678858, 219176632, 249238470
};
- int i, j;
- (void) bd;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, bd);
- }
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
}
void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
- static const int N = 16;
// {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
static const int32_t sinvalue_lookup[] = {
47852167, 94074787, 137093803, 175444254,
207820161, 233119001, 250479254, 259309736
};
- int i, j;
- (void) bd;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, bd);
- }
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index f1fb195..ba0b50a 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -36,79 +36,166 @@
#if CONFIG_EXT_TX
void fdst4(const tran_low_t *input, tran_low_t *output) {
- static const int N = 4;
// {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
static const int32_t sinvalue_lookup[] = {
141124871, 228344838,
};
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- idx--;
- sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1);
- }
- output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- }
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
}
void fdst8(const tran_low_t *input, tran_low_t *output) {
- static const int N = 8;
// {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
static const int sinvalue_lookup[] = {
86559612, 162678858, 219176632, 249238470
};
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- idx--;
- sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1);
- }
- output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- }
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
}
void fdst16(const tran_low_t *input, tran_low_t *output) {
- static const int N = 16;
// {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
static const int sinvalue_lookup[] = {
47852167, 94074787, 137093803, 175444254,
207820161, 233119001, 250479254, 259309736
};
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = idx > N + 1 - idx ? N + 1 - idx : idx;
- if (idx == 0) continue;
- idx--;
- sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1);
- }
- output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- }
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
}
#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 84f796e..9c4baa0 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -640,7 +640,6 @@
int start_tx_type, end_tx_type;
#endif // CONFIG_EXT_TX
-
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
assert(skip_prob > 0);
s0 = vp10_cost_bit(skip_prob, 0);
@@ -675,8 +674,14 @@
int r_tx_size = 0;
#if CONFIG_EXT_TX
- if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(n))
- continue;
+ if (is_inter_block(mbmi)) {
+ if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(n)) {
+ continue;
+ } else if (mbmi->ext_txfrm >= ALT11 && best_tx_type == NORM) {
+ // Terminate if the best so far is still NORM
+ break;
+ }
+ }
#endif // CONFIG_EXT_TX
for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
@@ -725,8 +730,8 @@
last_rd = rd;
#if CONFIG_EXT_TX
- if (rd < (is_inter_block(mbmi) &&
- (best_tx_type == NORM) ? ext_tx_th : 1) * best_rd) {
+ if (rd < (is_inter_block(mbmi) && best_tx_type == NORM ? ext_tx_th : 1) *
+ best_rd) {
#else
if (rd < best_rd) {
#endif // CONFIG_EXT_TX
@@ -747,7 +752,7 @@
mbmi->tx_size = best_tx;
#if CONFIG_EXT_TX
- mbmi->ext_txfrm = best_tx_type;
+ mbmi->ext_txfrm = best_tx_type > -1 ? best_tx_type : NORM;
txfm_rd_in_plane(x, &r, &d, &s,
&sse, ref_best_rd, 0, bs, best_tx,
cpi->sf.use_fast_coef_costing);