Merge "Passing TXFM_TYPE instead of func pointer" into nextgenv2
diff --git a/test/vp10_fht16x16_test.cc b/test/vp10_fht16x16_test.cc
index d501e10..3967149 100644
--- a/test/vp10_fht16x16_test.cc
+++ b/test/vp10_fht16x16_test.cc
@@ -103,20 +103,6 @@
make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 7,
VPX_BITS_8, 256),
make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 8,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 9,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 10,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 11,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 12,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 13,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 14,
- VPX_BITS_8, 256),
- make_tuple(&vp10_fht16x16_sse2, &vp10_iht16x16_256_add_sse2, 15,
VPX_BITS_8, 256)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE2
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
index d2598f9..bee1a0c 100644
--- a/test/vp10_fht4x4_test.cc
+++ b/test/vp10_fht4x4_test.cc
@@ -102,20 +102,6 @@
make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 7,
VPX_BITS_8, 16),
make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 8,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 9,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 10,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 11,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 12,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 13,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 14,
- VPX_BITS_8, 16),
- make_tuple(&vp10_fht4x4_sse2, &vp10_iht4x4_16_add_sse2, 15,
VPX_BITS_8, 16)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE2
diff --git a/test/vp10_fht8x8_test.cc b/test/vp10_fht8x8_test.cc
index 47feb3d6..96f5632 100644
--- a/test/vp10_fht8x8_test.cc
+++ b/test/vp10_fht8x8_test.cc
@@ -102,20 +102,6 @@
make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 7,
VPX_BITS_8, 64),
make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 8,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 9,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 10,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 11,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 12,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 13,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 14,
- VPX_BITS_8, 64),
- make_tuple(&vp10_fht8x8_sse2, &vp10_iht8x8_64_add_sse2, 15,
VPX_BITS_8, 64)));
#endif // !CONFIG_EXT_TX
#endif // HAVE_SSE2
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index ffa3c64..50b6981 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -383,10 +383,10 @@
#define USE_MSKTX_FOR_32X32 0
static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
- 1, 19, 12, 2
+ 1, 16, 12, 2
};
static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = {
- 1, 17, 10
+ 1, 12, 10
};
#if EXT_TX_SIZES == 4
@@ -437,17 +437,17 @@
// Transform types used in each intra set
static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, },
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0},
};
// Transform types used in each inter set
static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1},
+ {1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
};
static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index d48679e..407fe8c 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -186,7 +186,7 @@
};
static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = {
- 128, 128, 128,
+ 128, 160, 180, 128, 160
};
#if CONFIG_EXT_INTER
@@ -836,47 +836,27 @@
const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
[TREE_SIZE(TX_TYPES)] = {
{ // ToDo(yaowu): remove used entry 0.
- -IDTX, 2,
- -V_DCT, 4,
- -H_DCT, 6,
- -DCT_DCT, 8,
- -DST_DST, 10,
- 12, 22,
- 14, 16,
- -DST_DCT, -DCT_DST,
- 18, 20,
- -ADST_DCT, -DCT_ADST,
- -FLIPADST_DCT, -DCT_FLIPADST,
- 24, 30,
- 26, 28,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 32, 34,
- -ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ 0
}, {
-IDTX, 2,
- -V_DCT, 4,
- -H_DCT, 6,
- -DCT_DCT, 8,
- -DST_DST, 10,
- 12, 22,
- 14, 16,
- -DST_DCT, -DCT_DST,
- 18, 20,
+ 4, 14,
+ 6, 8,
+ -V_DCT, -H_DCT,
+ 10, 12,
+ -V_ADST, -H_ADST,
+ -V_FLIPADST, -H_FLIPADST,
+ -DCT_DCT, 16,
+ 18, 24,
+ 20, 22,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 24, 30,
26, 28,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 32, 34,
-ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
}, {
-IDTX, 2,
- -V_DCT, 4,
- -H_DCT, 6,
+ 4, 6,
+ -V_DCT, -H_DCT,
-DCT_DCT, 8,
10, 16,
12, 14,
@@ -893,39 +873,19 @@
const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
[TREE_SIZE(TX_TYPES)] = {
{ // ToDo(yaowu): remove unused entry 0.
- -IDTX, 2,
- -DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
- 14, 16,
- -ADST_DCT, -DCT_ADST,
- -FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
- -ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ 0
}, {
-IDTX, 2,
-DCT_DCT, 4,
- -DST_DST, 6,
- 8, 18,
- 10, 12,
- -DST_DCT, -DCT_DST,
- 14, 16,
+ 6, 8,
+ -V_DCT, -H_DCT,
+ 10, 16,
+ 12, 14,
-ADST_DCT, -DCT_ADST,
-FLIPADST_DCT, -DCT_FLIPADST,
- 20, 26,
- 22, 24,
- -DST_ADST, -ADST_DST,
- -DST_FLIPADST, -FLIPADST_DST,
- 28, 30,
+ 18, 20,
-ADST_ADST, -FLIPADST_FLIPADST,
- -ADST_FLIPADST, -FLIPADST_ADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
}, {
-IDTX, 2,
-DCT_DCT, 4,
@@ -942,33 +902,25 @@
static const vpx_prob
default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
{ // ToDo(yaowu): remove unused entry 0.
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 0 },
+ { 0 },
+ { 0 },
#if EXT_TX_SIZES == 4
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 0 },
#endif
}, {
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 16, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
#if EXT_TX_SIZES == 4
- { 12, 15, 15, 160, 16, 144, 160, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128, 128},
#endif
}, {
- { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
- { 12, 15, 15, 112, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
#if EXT_TX_SIZES == 4
- { 12, 15, 15, 160, 128, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
#endif
}, {
{ 12, },
@@ -985,266 +937,110 @@
[INTRA_MODES][TX_TYPES - 1] = {
{ // ToDo(yaowu): remove unused entry 0.
{
- { 8, 11, 24, 112, 87, 137, 127, 134,
- 128, 86, 128, 124, 125, 133, 176, 123, },
- { 10, 9, 39, 106, 73, 155, 163, 228,
- 35, 62, 129, 127, 133, 114, 213, 234, },
- { 10, 9, 14, 88, 91, 127, 151, 51,
- 210, 89, 126, 58, 52, 116, 217, 24, },
- { 9, 6, 29, 113, 98, 131, 149, 210,
- 119, 60, 124, 93, 90, 143, 170, 197, },
- { 8, 8, 38, 101, 111, 166, 167, 141,
- 130, 105, 128, 75, 75, 118, 197, 117, },
- { 7, 8, 39, 91, 101, 153, 166, 200,
- 99, 77, 123, 90, 83, 144, 224, 192, },
- { 7, 10, 26, 86, 119, 154, 130, 101,
- 152, 91, 129, 75, 79, 137, 219, 77, },
- { 10, 13, 20, 86, 102, 162, 112, 76,
- 171, 86, 134, 122, 106, 124, 196, 44, },
- { 8, 9, 33, 108, 100, 144, 148, 215,
- 77, 60, 125, 125, 128, 126, 198, 220, },
- { 3, 10, 29, 111, 69, 141, 204, 141,
- 139, 93, 120, 75, 77, 163, 242, 124, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
}, {
- { 2, 53, 18, 147, 96, 98, 136, 133,
- 131, 120, 153, 163, 169, 137, 173, 124, },
- { 4, 18, 34, 133, 54, 130, 179, 228,
- 28, 72, 153, 164, 168, 118, 227, 239, },
- { 4, 18, 13, 125, 72, 110, 176, 36,
- 221, 104, 148, 75, 72, 117, 225, 19, },
- { 8, 33, 24, 162, 113, 99, 147, 226,
- 103, 85, 153, 143, 153, 124, 155, 210, },
- { 2, 15, 35, 107, 127, 158, 192, 128,
- 126, 116, 151, 95, 88, 182, 241, 119, },
- { 3, 15, 36, 112, 100, 146, 194, 189,
- 90, 98, 152, 99, 100, 165, 235, 175, },
- { 3, 16, 29, 109, 103, 140, 182, 76,
- 173, 104, 147, 82, 85, 159, 235, 70, },
- { 9, 24, 14, 120, 86, 156, 161, 34,
- 177, 121, 142, 128, 128, 126, 185, 37, },
- { 5, 24, 29, 152, 98, 99, 174, 228,
- 82, 76, 147, 149, 128, 132, 191, 225, },
- { 2, 15, 29, 111, 77, 126, 200, 135,
- 117, 93, 152, 96, 84, 191, 245, 135, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
#if EXT_TX_SIZES == 4
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 },
#endif
},
}, {
{
- { 8, 11, 24, 112, 87, 137, 127, 134,
- 128, 86, 128, 124, 125, 133, 176, 123, },
- { 10, 9, 39, 106, 73, 155, 163, 228,
- 35, 62, 129, 127, 133, 114, 213, 234, },
- { 10, 9, 14, 88, 91, 127, 151, 51,
- 210, 89, 126, 58, 52, 116, 217, 24, },
- { 9, 6, 29, 113, 98, 131, 149, 210,
- 119, 60, 124, 93, 90, 143, 170, 197, },
- { 8, 8, 38, 101, 111, 166, 167, 141,
- 130, 105, 128, 75, 75, 118, 197, 117, },
- { 7, 8, 39, 91, 101, 153, 166, 200,
- 99, 77, 123, 90, 83, 144, 224, 192, },
- { 7, 10, 26, 86, 119, 154, 130, 101,
- 152, 91, 129, 75, 79, 137, 219, 77, },
- { 10, 13, 20, 86, 102, 162, 112, 76,
- 171, 86, 134, 122, 106, 124, 196, 44, },
- { 8, 9, 33, 108, 100, 144, 148, 215,
- 77, 60, 125, 125, 128, 126, 198, 220, },
- { 3, 10, 29, 111, 69, 141, 204, 141,
- 139, 93, 120, 75, 77, 163, 242, 124, },
+ { 8, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, },
+ { 10, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, },
+ { 9, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 7, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 7, 20, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 10, 23, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 8, 29, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 20, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 53, 18, 147, 96, 98, 136, 133,
- 131, 120, 153, 163, 169, 137, 173, 124, },
- { 4, 18, 34, 133, 54, 130, 179, 228,
- 28, 72, 153, 164, 168, 118, 227, 239, },
- { 4, 18, 13, 125, 72, 110, 176, 36,
- 221, 104, 148, 75, 72, 117, 225, 19, },
- { 8, 33, 24, 162, 113, 99, 147, 226,
- 103, 85, 153, 143, 153, 124, 155, 210, },
- { 2, 15, 35, 107, 127, 158, 192, 128,
- 126, 116, 151, 95, 88, 182, 241, 119, },
- { 3, 15, 36, 112, 100, 146, 194, 189,
- 90, 98, 152, 99, 100, 165, 235, 175, },
- { 3, 16, 29, 109, 103, 140, 182, 76,
- 173, 104, 147, 82, 85, 159, 235, 70, },
- { 9, 24, 14, 120, 86, 156, 161, 34,
- 177, 121, 142, 128, 128, 126, 185, 37, },
- { 5, 24, 29, 152, 98, 99, 174, 228,
- 82, 76, 147, 149, 128, 132, 191, 225, },
- { 2, 15, 29, 111, 77, 126, 200, 135,
- 117, 93, 152, 96, 84, 191, 245, 135, },
+ { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, },
+ { 4, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, },
+ { 8, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 3, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 26, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 9, 24, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 5, 24, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 2, 25, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 28, 32, 128, 176, 192, 208, 128, 128, 128, 128, },
+ { 1, 28, 32, 128, 176, 192, 48, 128, 128, 128, 128, },
+ { 4, 160, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 28, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 29, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 25, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
#if EXT_TX_SIZES == 4
}, {
- { 2, 69, 13, 173, 111, 69, 137, 159,
- 159, 146, 151, 193, 203, 131, 180, 123, },
- { 1, 12, 33, 164, 32, 98, 204, 242,
- 23, 99, 149, 215, 232, 110, 239, 245, },
- { 1, 17, 9, 136, 82, 83, 171, 28,
- 231, 128, 135, 76, 64, 118, 235, 17, },
- { 4, 41, 17, 195, 131, 58, 161, 237,
- 141, 97, 153, 189, 191, 117, 182, 202, },
- { 2, 17, 36, 104, 149, 137, 217, 139,
- 191, 119, 125, 107, 115, 223, 249, 110, },
- { 2, 14, 24, 127, 91, 135, 219, 198,
- 113, 91, 164, 125, 173, 211, 250, 116, },
- { 3, 19, 24, 120, 102, 130, 209, 81,
- 187, 95, 143, 102, 50, 190, 244, 56, },
- { 4, 27, 10, 128, 91, 157, 181, 33,
- 181, 150, 141, 141, 166, 114, 215, 25, },
- { 2, 34, 27, 187, 102, 77, 210, 245,
- 113, 107, 136, 184, 188, 121, 210, 234, },
- { 1, 15, 22, 141, 59, 94, 208, 133,
- 154, 95, 152, 112, 105, 191, 242, 111, },
+ { 2, 176, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 12, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 17, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 41, 32, 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 17, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 14, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 19, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 32, 128, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 32, 128, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 15, 32, 128, 96, 128, 128, 128, 160, 192, 128, },
#endif
},
}, {
{
- { 8, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 10, 28, 176, 192, 208, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 10, 28, 176, 192, 48, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 9, 160, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 8, 28, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 7, 28, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 7, 20, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 10, 23, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 8, 29, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 20, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 10, 28, 176, 192, 208, 128, 128, 128, 128, },
+ { 10, 28, 176, 192, 48, 128, 128, 128, 128, },
+ { 9, 160, 128, 128, 128, 128, 128, 128, 128, },
+ { 8, 28, 96, 128, 128, 128, 160, 192, 128, },
+ { 7, 28, 160, 176, 192, 128, 128, 128, 128, },
+ { 7, 20, 160, 176, 64, 128, 128, 128, 128, },
+ { 10, 23, 160, 176, 64, 128, 128, 128, 128, },
+ { 8, 29, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 20, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 28, 176, 192, 208, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 28, 176, 192, 48, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 8, 160, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 28, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 28, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 26, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 9, 24, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 5, 24, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 25, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 4, 28, 176, 192, 208, 128, 128, 128, 128, },
+ { 4, 28, 176, 192, 48, 128, 128, 128, 128, },
+ { 8, 160, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 96, 128, 128, 128, 160, 192, 128, },
+ { 3, 28, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 26, 160, 176, 64, 128, 128, 128, 128, },
+ { 9, 24, 160, 176, 64, 128, 128, 128, 128, },
+ { 5, 24, 160, 176, 192, 128, 128, 128, 128, },
+ { 2, 25, 96, 128, 128, 128, 160, 192, 128, },
}, {
- { 2, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 28, 176, 192, 208, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 28, 176, 192, 48, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 160, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 28, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 28, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 29, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 27, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 34, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 25, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 28, 176, 192, 208, 128, 128, 128, 128, },
+ { 1, 28, 176, 192, 48, 128, 128, 128, 128, },
+ { 4, 160, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 28, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 28, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 29, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 25, 96, 128, 128, 128, 160, 192, 128, },
#if EXT_TX_SIZES == 4
}, {
- { 2, 176, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 12, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 17, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 41, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 17, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 14, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 3, 19, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 4, 27, 160, 176, 64, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 2, 34, 160, 176, 192, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 128, 128, },
- { 1, 15, 96, 128, 128, 128, 160, 192,
- 128, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 176, 128, 128, 128, 128, 128, 128, 128, },
+ { 1, 12, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 17, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 41, 128, 128, 128, 128, 128, 128, 128, },
+ { 2, 17, 96, 128, 128, 128, 160, 192, 128, },
+ { 2, 14, 160, 176, 192, 128, 128, 128, 128, },
+ { 3, 19, 160, 176, 64, 128, 128, 128, 128, },
+ { 4, 27, 160, 176, 64, 128, 128, 128, 128, },
+ { 2, 34, 160, 176, 192, 128, 128, 128, 128, },
+ { 1, 15, 96, 128, 128, 128, 160, 192, 128, },
#endif
},
},
@@ -1323,8 +1119,7 @@
vp10_copy(fc->newmv_prob, default_newmv_prob);
vp10_copy(fc->zeromv_prob, default_zeromv_prob);
vp10_copy(fc->refmv_prob, default_refmv_prob);
- vp10_copy(fc->drl_prob0, default_drl_prob);
- vp10_copy(fc->drl_prob1, default_drl_prob);
+ vp10_copy(fc->drl_prob, default_drl_prob);
#if CONFIG_EXT_INTER
fc->new2mv_prob = default_new2mv_prob;
#endif // CONFIG_EXT_INTER
@@ -1408,12 +1203,8 @@
counts->refmv_mode[i]);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- fc->drl_prob0[i] = mode_mv_merge_probs(pre_fc->drl_prob0[i],
- counts->drl_mode0[i]);
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- fc->drl_prob1[i] = mode_mv_merge_probs(pre_fc->drl_prob1[i],
- counts->drl_mode1[i]);
-
+ fc->drl_prob[i] = mode_mv_merge_probs(pre_fc->drl_prob[i],
+ counts->drl_mode[i]);
#if CONFIG_EXT_INTER
fc->new2mv_prob = mode_mv_merge_probs(pre_fc->new2mv_prob,
counts->new2mv_mode);
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 2443d60..ba93ea7 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -55,8 +55,7 @@
vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS];
vpx_prob zeromv_prob[ZEROMV_MODE_CONTEXTS];
vpx_prob refmv_prob[REFMV_MODE_CONTEXTS];
- vpx_prob drl_prob0[DRL_MODE_CONTEXTS];
- vpx_prob drl_prob1[DRL_MODE_CONTEXTS];
+ vpx_prob drl_prob[DRL_MODE_CONTEXTS];
#if CONFIG_EXT_INTER
vpx_prob new2mv_prob;
@@ -121,8 +120,7 @@
unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
- unsigned int drl_mode0[DRL_MODE_CONTEXTS][2];
- unsigned int drl_mode1[DRL_MODE_CONTEXTS][2];
+ unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
#if CONFIG_EXT_INTER
unsigned int new2mv_mode[2];
#endif // CONFIG_EXT_INTER
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 87bcc8a..9160c5f 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -111,21 +111,17 @@
FLIPADST_FLIPADST = 6,
ADST_FLIPADST = 7,
FLIPADST_ADST = 8,
- DST_DCT = 9,
- DCT_DST = 10,
- DST_ADST = 11,
- ADST_DST = 12,
- DST_FLIPADST = 13,
- FLIPADST_DST = 14,
- DST_DST = 15,
- IDTX = 16,
- V_DCT = 17,
- H_DCT = 18,
+ IDTX = 9,
+ V_DCT = 10,
+ H_DCT = 11,
+ V_ADST = 12,
+ H_ADST = 13,
+ V_FLIPADST = 14,
+ H_FLIPADST = 15,
#endif // CONFIG_EXT_TX
TX_TYPES,
} TX_TYPE;
-
#if CONFIG_EXT_TX
#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
@@ -247,7 +243,7 @@
#define NEWMV_MODE_CONTEXTS 7
#define ZEROMV_MODE_CONTEXTS 2
#define REFMV_MODE_CONTEXTS 9
-#define DRL_MODE_CONTEXTS 3
+#define DRL_MODE_CONTEXTS 5
#define ZEROMV_OFFSET 3
#define REFMV_OFFSET 4
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 863f0db..0e211ad 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -19,247 +19,6 @@
#include "vpx_ports/mem.h"
#if CONFIG_EXT_TX
-void idst4_c(const tran_low_t *input, tran_low_t *output) {
- tran_low_t step[4];
- tran_high_t temp1, temp2;
- // stage 1
- temp1 = (input[3] + input[1]) * cospi_16_64;
- temp2 = (input[3] - input[1]) * cospi_16_64;
- step[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
- temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
- step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- // stage 2
- output[0] = WRAPLOW(step[0] + step[3], 8);
- output[1] = WRAPLOW(-step[1] - step[2], 8);
- output[2] = WRAPLOW(step[1] - step[2], 8);
- output[3] = WRAPLOW(step[3] - step[0], 8);
-}
-
-void idst8_c(const tran_low_t *input, tran_low_t *output) {
- // vp9_igentx8(input, output, Tx8);
- tran_low_t step1[8], step2[8];
- tran_high_t temp1, temp2;
- // stage 1
- step1[0] = input[7];
- step1[2] = input[3];
- step1[1] = input[5];
- step1[3] = input[1];
- temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
- temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
- temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- // stage 2
- temp1 = (step1[0] + step1[2]) * cospi_16_64;
- temp2 = (step1[0] - step1[2]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
- temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
-
- // stage 3
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
- step1[4] = step2[4];
- temp1 = (step2[6] - step2[5]) * cospi_16_64;
- temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step1[7] = step2[7];
-
- // stage 4
- output[0] = WRAPLOW(step1[0] + step1[7], 8);
- output[1] = WRAPLOW(-step1[1] - step1[6], 8);
- output[2] = WRAPLOW(step1[2] + step1[5], 8);
- output[3] = WRAPLOW(-step1[3] - step1[4], 8);
- output[4] = WRAPLOW(step1[3] - step1[4], 8);
- output[5] = WRAPLOW(-step1[2] + step1[5], 8);
- output[6] = WRAPLOW(step1[1] - step1[6], 8);
- output[7] = WRAPLOW(-step1[0] + step1[7], 8);
-}
-
-void idst16_c(const tran_low_t *input, tran_low_t *output) {
- tran_low_t step1[16], step2[16];
- tran_high_t temp1, temp2;
-
- // stage 1
- step1[0] = input[15];
- step1[1] = input[7];
- step1[2] = input[11];
- step1[3] = input[3];
- step1[4] = input[13];
- step1[5] = input[5];
- step1[6] = input[9];
- step1[7] = input[1];
- step1[8] = input[14];
- step1[9] = input[6];
- step1[10] = input[10];
- step1[11] = input[2];
- step1[12] = input[12];
- step1[13] = input[4];
- step1[14] = input[8];
- step1[15] = input[0];
-
- // stage 2
- step2[0] = step1[0];
- step2[1] = step1[1];
- step2[2] = step1[2];
- step2[3] = step1[3];
- step2[4] = step1[4];
- step2[5] = step1[5];
- step2[6] = step1[6];
- step2[7] = step1[7];
-
- temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
- temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
- temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
- temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
- temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- // stage 3
- step1[0] = step2[0];
- step1[1] = step2[1];
- step1[2] = step2[2];
- step1[3] = step2[3];
-
- temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
- temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
- temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
-
- step1[8] = WRAPLOW(step2[8] + step2[9], 8);
- step1[9] = WRAPLOW(step2[8] - step2[9], 8);
- step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
- step1[11] = WRAPLOW(step2[10] + step2[11], 8);
- step1[12] = WRAPLOW(step2[12] + step2[13], 8);
- step1[13] = WRAPLOW(step2[12] - step2[13], 8);
- step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
- step1[15] = WRAPLOW(step2[14] + step2[15], 8);
-
- // stage 4
- temp1 = (step1[0] + step1[1]) * cospi_16_64;
- temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
- temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[4] = WRAPLOW(step1[4] + step1[5], 8);
- step2[5] = WRAPLOW(step1[4] - step1[5], 8);
- step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
- step2[7] = WRAPLOW(step1[6] + step1[7], 8);
-
- step2[8] = step1[8];
- step2[15] = step1[15];
- temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
- temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
- temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[11] = step1[11];
- step2[12] = step1[12];
-
- // stage 5
- step1[0] = WRAPLOW(step2[0] + step2[3], 8);
- step1[1] = WRAPLOW(step2[1] + step2[2], 8);
- step1[2] = WRAPLOW(step2[1] - step2[2], 8);
- step1[3] = WRAPLOW(step2[0] - step2[3], 8);
- step1[4] = step2[4];
- temp1 = (step2[6] - step2[5]) * cospi_16_64;
- temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step1[7] = step2[7];
-
- step1[8] = WRAPLOW(step2[8] + step2[11], 8);
- step1[9] = WRAPLOW(step2[9] + step2[10], 8);
- step1[10] = WRAPLOW(step2[9] - step2[10], 8);
- step1[11] = WRAPLOW(step2[8] - step2[11], 8);
- step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
- step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
- step1[14] = WRAPLOW(step2[13] + step2[14], 8);
- step1[15] = WRAPLOW(step2[12] + step2[15], 8);
-
- // stage 6
- step2[0] = WRAPLOW(step1[0] + step1[7], 8);
- step2[1] = WRAPLOW(step1[1] + step1[6], 8);
- step2[2] = WRAPLOW(step1[2] + step1[5], 8);
- step2[3] = WRAPLOW(step1[3] + step1[4], 8);
- step2[4] = WRAPLOW(step1[3] - step1[4], 8);
- step2[5] = WRAPLOW(step1[2] - step1[5], 8);
- step2[6] = WRAPLOW(step1[1] - step1[6], 8);
- step2[7] = WRAPLOW(step1[0] - step1[7], 8);
- step2[8] = step1[8];
- step2[9] = step1[9];
- temp1 = (-step1[10] + step1[13]) * cospi_16_64;
- temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
- temp1 = (-step1[11] + step1[12]) * cospi_16_64;
- temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
- step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
- step2[14] = step1[14];
- step2[15] = step1[15];
-
- // stage 7
- output[0] = WRAPLOW(step2[0] + step2[15], 8);
- output[1] = WRAPLOW(-step2[1] - step2[14], 8);
- output[2] = WRAPLOW(step2[2] + step2[13], 8);
- output[3] = WRAPLOW(-step2[3] - step2[12], 8);
- output[4] = WRAPLOW(step2[4] + step2[11], 8);
- output[5] = WRAPLOW(-step2[5] - step2[10], 8);
- output[6] = WRAPLOW(step2[6] + step2[9], 8);
- output[7] = WRAPLOW(-step2[7] - step2[8], 8);
- output[8] = WRAPLOW(step2[7] - step2[8], 8);
- output[9] = WRAPLOW(-step2[6] + step2[9], 8);
- output[10] = WRAPLOW(step2[5] - step2[10], 8);
- output[11] = WRAPLOW(-step2[4] + step2[11], 8);
- output[12] = WRAPLOW(step2[3] - step2[12], 8);
- output[13] = WRAPLOW(-step2[2] + step2[13], 8);
- output[14] = WRAPLOW(step2[1] - step2[14], 8);
- output[15] = WRAPLOW(-step2[0] + step2[15], 8);
-}
-
-#if CONFIG_EXT_TX
static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 4; ++i)
@@ -285,21 +44,6 @@
}
// For use in lieu of DST
-static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- for (i = 0; i < 8; ++i) {
- output[i] = input[16 + i] * 4;
- output[24 + i] = input[24 + i] * 4;
- }
- // Multiply input by sqrt(2)
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
- }
- idct16_c(inputhalf, output + 8);
- // Note overall scaling factor is 4 times orthogonal
-}
-
static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
int i;
tran_low_t inputhalf[16];
@@ -379,7 +123,6 @@
// Note overall scaling factor is 4 times orthogonal
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // CONFIG_EXT_TX
// Inverse identity transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
@@ -412,24 +155,21 @@
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
case IDTX:
case V_DCT:
case H_DCT:
+ case V_ADST:
+ case H_ADST:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
- case FLIPADST_DST:
+ case V_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
- case DST_FLIPADST:
+ case H_FLIPADST:
// flip LR
FLIPUD_PTR(*src, *sstride, size);
break;
@@ -716,24 +456,21 @@
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
case IDTX:
case V_DCT:
case H_DCT:
+ case V_ADST:
+ case H_ADST:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
- case FLIPADST_DST:
+ case V_FLIPADST:
// flip UD
FLIPUD_PTR(*dst, *dstride, size);
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
- case DST_FLIPADST:
+ case H_FLIPADST:
// flip LR
FLIPUD_PTR(*src, *sstride, size);
break;
@@ -754,26 +491,23 @@
void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_4[] = {
- { idct4_c, idct4_c }, // DCT_DCT = 0,
- { iadst4_c, idct4_c }, // ADST_DCT = 1,
- { idct4_c, iadst4_c }, // DCT_ADST = 2,
- { iadst4_c, iadst4_c }, // ADST_ADST = 3,
+ { idct4_c, idct4_c }, // DCT_DCT
+ { iadst4_c, idct4_c }, // ADST_DCT
+ { idct4_c, iadst4_c }, // DCT_ADST
+ { iadst4_c, iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { iadst4_c, idct4_c }, // FLIPADST_DCT = 4,
- { idct4_c, iadst4_c }, // DCT_FLIPADST = 5,
- { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST = 6,
- { iadst4_c, iadst4_c }, // ADST_FLIPADST = 7,
- { iadst4_c, iadst4_c }, // FLIPADST_ADST = 8,
- { idst4_c, idct4_c }, // DST_DCT = 9,
- { idct4_c, idst4_c }, // DCT_DST = 10,
- { idst4_c, iadst4_c }, // DST_ADST = 11,
- { iadst4_c, idst4_c }, // ADST_DST = 12,
- { idst4_c, iadst4_c }, // DST_FLIPADST = 13,
- { iadst4_c, idst4_c }, // FLIPADST_DST = 14,
- { idst4_c, idst4_c }, // DST_DST = 15
- { iidtx4_c, iidtx4_c }, // IDTX = 16
- { idct4_c, iidtx4_c }, // V_DCT = 17
- { iidtx4_c, idct4_c }, // H_DCT = 18
+ { iadst4_c, idct4_c }, // FLIPADST_DCT
+ { idct4_c, iadst4_c }, // DCT_FLIPADST
+ { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST
+ { iadst4_c, iadst4_c }, // ADST_FLIPADST
+ { iadst4_c, iadst4_c }, // FLIPADST_ADST
+ { iidtx4_c, iidtx4_c }, // IDTX
+ { idct4_c, iidtx4_c }, // V_DCT
+ { iidtx4_c, idct4_c }, // H_DCT
+ { iadst4_c, iidtx4_c }, // V_ADST
+ { iidtx4_c, iadst4_c }, // H_ADST
+ { iadst4_c, iidtx4_c }, // V_FLIPADST
+ { iidtx4_c, iadst4_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -820,26 +554,23 @@
void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_8[] = {
- { idct8_c, idct8_c }, // DCT_DCT = 0,
- { iadst8_c, idct8_c }, // ADST_DCT = 1,
- { idct8_c, iadst8_c }, // DCT_ADST = 2,
- { iadst8_c, iadst8_c }, // ADST_ADST = 3,
+ { idct8_c, idct8_c }, // DCT_DCT
+ { iadst8_c, idct8_c }, // ADST_DCT
+ { idct8_c, iadst8_c }, // DCT_ADST
+ { iadst8_c, iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { iadst8_c, idct8_c }, // FLIPADST_DCT = 4,
- { idct8_c, iadst8_c }, // DCT_FLIPADST = 5,
- { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST = 6,
- { iadst8_c, iadst8_c }, // ADST_FLIPADST = 7,
- { iadst8_c, iadst8_c }, // FLIPADST_ADST = 8,
- { idst8_c, idct8_c }, // DST_DCT = 9,
- { idct8_c, idst8_c }, // DCT_DST = 10,
- { idst8_c, iadst8_c }, // DST_ADST = 11,
- { iadst8_c, idst8_c }, // ADST_DST = 12,
- { idst8_c, iadst8_c }, // DST_FLIPADST = 13,
- { iadst8_c, idst8_c }, // FLIPADST_DST = 14,
- { idst8_c, idst8_c }, // DST_DST = 15
- { iidtx8_c, iidtx8_c }, // IDTX = 16
- { idct8_c, iidtx8_c }, // V_DCT = 17
- { iidtx8_c, idct8_c }, // H_DCT = 18
+ { iadst8_c, idct8_c }, // FLIPADST_DCT
+ { idct8_c, iadst8_c }, // DCT_FLIPADST
+ { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST
+ { iadst8_c, iadst8_c }, // ADST_FLIPADST
+ { iadst8_c, iadst8_c }, // FLIPADST_ADST
+ { iidtx8_c, iidtx8_c }, // IDTX
+ { idct8_c, iidtx8_c }, // V_DCT
+ { iidtx8_c, idct8_c }, // H_DCT
+ { iadst8_c, iidtx8_c }, // V_ADST
+ { iidtx8_c, iadst8_c }, // H_ADST
+ { iadst8_c, iidtx8_c }, // V_FLIPADST
+ { iidtx8_c, iadst8_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -886,26 +617,23 @@
void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
static const transform_2d IHT_16[] = {
- { idct16_c, idct16_c }, // DCT_DCT = 0,
- { iadst16_c, idct16_c }, // ADST_DCT = 1,
- { idct16_c, iadst16_c }, // DCT_ADST = 2,
- { iadst16_c, iadst16_c }, // ADST_ADST = 3,
+ { idct16_c, idct16_c }, // DCT_DCT
+ { iadst16_c, idct16_c }, // ADST_DCT
+ { idct16_c, iadst16_c }, // DCT_ADST
+ { iadst16_c, iadst16_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { iadst16_c, idct16_c }, // FLIPADST_DCT = 4,
- { idct16_c, iadst16_c }, // DCT_FLIPADST = 5,
- { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST = 6,
- { iadst16_c, iadst16_c }, // ADST_FLIPADST = 7,
- { iadst16_c, iadst16_c }, // FLIPADST_ADST = 8,
- { idst16_c, idct16_c }, // DST_DCT = 9,
- { idct16_c, idst16_c }, // DCT_DST = 10,
- { idst16_c, iadst16_c }, // DST_ADST = 11,
- { iadst16_c, idst16_c }, // ADST_DST = 12,
- { idst16_c, iadst16_c }, // DST_FLIPADST = 13,
- { iadst16_c, idst16_c }, // FLIPADST_DST = 14,
- { idst16_c, idst16_c }, // DST_DST = 15
- { iidtx16_c, iidtx16_c }, // IDTX = 16
- { idct16_c, iidtx16_c }, // V_DCT = 17
- { iidtx16_c, idct16_c }, // H_DCT = 18
+ { iadst16_c, idct16_c }, // FLIPADST_DCT
+ { idct16_c, iadst16_c }, // DCT_FLIPADST
+ { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST
+ { iadst16_c, iadst16_c }, // ADST_FLIPADST
+ { iadst16_c, iadst16_c }, // FLIPADST_ADST
+ { iidtx16_c, iidtx16_c }, // IDTX
+ { idct16_c, iidtx16_c }, // V_DCT
+ { iidtx16_c, idct16_c }, // H_DCT
+ { iadst16_c, iidtx16_c }, // V_ADST
+ { iidtx16_c, iadst16_c }, // H_ADST
+ { iadst16_c, iidtx16_c }, // V_FLIPADST
+ { iidtx16_c, iadst16_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -953,25 +681,22 @@
void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
int stride, int tx_type) {
static const transform_2d IHT_32[] = {
- { idct32_c, idct32_c }, // DCT_DCT = 0,
- { ihalfright32_c, idct32_c }, // ADST_DCT = 1,
- { idct32_c, ihalfright32_c }, // DCT_ADST = 2,
- { ihalfright32_c, ihalfright32_c }, // ADST_ADST = 3,
- { ihalfright32_c, idct32_c }, // FLIPADST_DCT = 4,
- { idct32_c, ihalfright32_c }, // DCT_FLIPADST = 5,
- { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST = 6,
- { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST = 7,
- { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST = 8,
- { ihalfcenter32_c, idct32_c }, // DST_DCT = 9,
- { idct32_c, ihalfcenter32_c }, // DCT_DST = 10,
- { ihalfcenter32_c, ihalfright32_c }, // DST_ADST = 11,
- { ihalfright32_c, ihalfcenter32_c }, // ADST_DST = 12,
- { ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
- { ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
- { ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
- { iidtx32_c, iidtx32_c }, // IDTX = 16
- { idct32_c, iidtx32_c }, // V_DCT = 17
- { iidtx32_c, idct32_c }, // H_DCT = 18
+ { idct32_c, idct32_c }, // DCT_DCT
+ { ihalfright32_c, idct32_c }, // ADST_DCT
+ { idct32_c, ihalfright32_c }, // DCT_ADST
+ { ihalfright32_c, ihalfright32_c }, // ADST_ADST
+ { ihalfright32_c, idct32_c }, // FLIPADST_DCT
+ { idct32_c, ihalfright32_c }, // DCT_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
+ { iidtx32_c, iidtx32_c }, // IDTX
+ { idct32_c, iidtx32_c }, // V_DCT
+ { iidtx32_c, idct32_c }, // H_DCT
+ { ihalfright32_c, iidtx16_c }, // V_ADST
+ { iidtx16_c, ihalfright32_c }, // H_ADST
+ { ihalfright32_c, iidtx16_c }, // V_FLIPADST
+ { iidtx16_c, ihalfright32_c }, // H_FLIPADST
};
int i, j;
@@ -1098,15 +823,12 @@
case FLIPADST_ADST:
vp10_iht4x4_16_add(input, dest, stride, tx_type);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
@@ -1139,15 +861,12 @@
case FLIPADST_ADST:
vp10_iht8x8_64_add(input, dest, stride, tx_type);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
@@ -1180,15 +899,12 @@
case FLIPADST_ADST:
vp10_iht16x16_256_add(input, dest, stride, tx_type);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
@@ -1217,15 +933,12 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
break;
case IDTX:
@@ -1242,26 +955,23 @@
void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_4[] = {
- { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0,
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1,
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST = 3,
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT = 4,
- { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST = 5,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST = 6,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST = 7,
- { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST = 8,
- { highbd_idst4_c, vpx_highbd_idct4_c }, // DST_DCT = 9,
- { vpx_highbd_idct4_c, highbd_idst4_c }, // DCT_DST = 10,
- { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_ADST = 11,
- { vpx_highbd_iadst4_c, highbd_idst4_c }, // ADST_DST = 12,
- { highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
- { vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
- { highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
- { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16
- { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17
- { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST
+ { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
+ { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
+ { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT
+ { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
+ { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_ADST
+ { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
+ { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -1311,26 +1021,23 @@
void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_8[] = {
- { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0,
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1,
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST = 3,
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT = 4,
- { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST = 5,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST = 6,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST = 7,
- { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST = 8,
- { highbd_idst8_c, vpx_highbd_idct8_c }, // DST_DCT = 9,
- { vpx_highbd_idct8_c, highbd_idst8_c }, // DCT_DST = 10,
- { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_ADST = 11,
- { vpx_highbd_iadst8_c, highbd_idst8_c }, // ADST_DST = 12,
- { highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
- { vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
- { highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
- { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16
- { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17
- { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
+ { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
+ { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
+ { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT
+ { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
+ { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_ADST
+ { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
+ { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -1380,26 +1087,23 @@
void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_16[] = {
- { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0,
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1,
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST = 3,
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST
#if CONFIG_EXT_TX
- { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT = 4,
- { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST = 5,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST = 6,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST = 7,
- { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST = 8,
- { highbd_idst16_c, vpx_highbd_idct16_c }, // DST_DCT = 9,
- { vpx_highbd_idct16_c, highbd_idst16_c }, // DCT_DST = 10,
- { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_ADST = 11,
- { vpx_highbd_iadst16_c, highbd_idst16_c }, // ADST_DST = 12,
- { highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
- { vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
- { highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
- { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16
- { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17
- { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
+ { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
+ { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
+ { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT
+ { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
+ { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_ADST
+ { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
+ { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
@@ -1459,16 +1163,13 @@
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
{ highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
- { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
- { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
+ { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
{ vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
- { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
+ { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
+ { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
+ { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
+ { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1602,15 +1303,12 @@
case FLIPADST_ADST:
vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
break;
@@ -1644,15 +1342,12 @@
case FLIPADST_ADST:
vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
break;
@@ -1686,15 +1381,12 @@
case FLIPADST_ADST:
vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
break;
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST only exists in C code
vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
break;
@@ -1724,15 +1416,12 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DST_DCT:
- case DCT_DST:
- case DST_ADST:
- case ADST_DST:
- case FLIPADST_DST:
- case DST_FLIPADST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
break;
case IDTX:
diff --git a/vp10/common/mvref_common.c b/vp10/common/mvref_common.c
index c67beed..8d40bf2 100644
--- a/vp10/common/mvref_common.c
+++ b/vp10/common/mvref_common.c
@@ -749,6 +749,10 @@
void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd,
int block, int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_mv_count,
+#endif
#if CONFIG_EXT_INTER
int_mv *mv_list,
#endif // CONFIG_EXT_INTER
@@ -760,11 +764,11 @@
b_mode_info *bmi = mi->bmi;
int n;
#if CONFIG_REF_MV
- CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE];
CANDIDATE_MV tmp_mv;
- uint8_t ref_mv_count = 0, idx;
+ uint8_t idx;
uint8_t above_count = 0, left_count = 0;
MV_REFERENCE_FRAME rf[2] = { mi->mbmi.ref_frame[ref], NONE };
+ *ref_mv_count = 0;
#endif
assert(MAX_MV_REF_CANDIDATES == 2);
@@ -774,12 +778,12 @@
#if CONFIG_REF_MV
scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
- -1, 0, ref_mv_stack, &ref_mv_count);
- above_count = ref_mv_count;
+ -1, 0, ref_mv_stack, ref_mv_count);
+ above_count = *ref_mv_count;
scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf,
- 0, -1, ref_mv_stack, &ref_mv_count);
- left_count = ref_mv_count - above_count;
+ 0, -1, ref_mv_stack, ref_mv_count);
+ left_count = *ref_mv_count - above_count;
if (above_count > 1 && left_count > 0) {
tmp_mv = ref_mv_stack[1];
@@ -787,7 +791,7 @@
ref_mv_stack[above_count] = tmp_mv;
}
- for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, ref_mv_count); ++idx) {
+ for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx) {
mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
clamp_mv_ref(&mv_list[idx].as_mv,
xd->n8_w << 3, xd->n8_h << 3, xd);
diff --git a/vp10/common/mvref_common.h b/vp10/common/mvref_common.h
index bc6d824..76530e9 100644
--- a/vp10/common/mvref_common.h
+++ b/vp10/common/mvref_common.h
@@ -289,16 +289,24 @@
static INLINE uint8_t vp10_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
int ref_idx) {
if (ref_mv_stack[ref_idx].weight > REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight > REF_CAT_LEVEL)
- return 0;
+ ref_mv_stack[ref_idx + 1].weight > REF_CAT_LEVEL) {
+ if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+ return 0;
+ else
+ return 1;
+ }
if (ref_mv_stack[ref_idx].weight > REF_CAT_LEVEL &&
ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 1;
+ return 2;
if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
- ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
- return 2;
+ ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL) {
+ if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+ return 3;
+ else
+ return 4;
+ }
assert(0);
return 0;
@@ -327,6 +335,10 @@
void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd,
int block, int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_mv_count,
+#endif
#if CONFIG_EXT_INTER
int_mv *mv_list,
#endif // CONFIG_EXT_INTER
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 6dc5604..2644ecf 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -2882,13 +2882,10 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
+ {row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
+ {col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
{row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors},
{col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors},
}, { // TX_8X8
@@ -2902,13 +2899,10 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
+ {row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
+ {col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
{row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors},
{col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors},
}, { // TX_16X16
@@ -2930,22 +2924,12 @@
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
- {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
+ {row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors},
+ {col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
@@ -2965,26 +2949,14 @@
qtr_scan_32x32_neighbors},
{qtr_scan_32x32, vp10_qtr_iscan_32x32,
qtr_scan_32x32_neighbors},
- {h2_scan_32x32, vp10_h2_iscan_32x32,
- h2_scan_32x32_neighbors},
- {v2_scan_32x32, vp10_v2_iscan_32x32,
- v2_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
- {h2_scan_32x32, vp10_h2_iscan_32x32,
- h2_scan_32x32_neighbors},
- {v2_scan_32x32, vp10_v2_iscan_32x32,
- v2_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
}
};
@@ -3000,13 +2972,10 @@
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
{default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
- {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+ {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors},
+ {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors},
+ {mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors},
+ {mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors},
{mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors},
{mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors},
}, { // TX_8X8
@@ -3020,13 +2989,10 @@
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
{default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
- {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+ {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors},
+ {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors},
+ {mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors},
+ {mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors},
{mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors},
{mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors},
}, { // TX_16X16
@@ -3050,22 +3016,12 @@
default_scan_16x16_neighbors},
{default_scan_16x16, vp10_default_iscan_16x16,
default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {default_scan_16x16, vp10_default_iscan_16x16,
- default_scan_16x16_neighbors},
- {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
- {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
+ {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
+ {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
+ {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
+ {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
+ {mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors},
+ {mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors},
}, { // TX_32X32
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
@@ -3085,24 +3041,14 @@
qtr_scan_32x32_neighbors},
{qtr_scan_32x32, vp10_qtr_iscan_32x32,
qtr_scan_32x32_neighbors},
- {h2_scan_32x32, vp10_h2_iscan_32x32,
- h2_scan_32x32_neighbors},
- {v2_scan_32x32, vp10_v2_iscan_32x32,
- v2_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
- {qtr_scan_32x32, vp10_qtr_iscan_32x32,
- qtr_scan_32x32_neighbors},
{default_scan_32x32, vp10_default_iscan_32x32,
default_scan_32x32_neighbors},
{mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
{mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
+ {mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors},
+ {mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors},
}
};
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index f8bfc89..66e788b 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -387,11 +387,7 @@
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
- cm->counts.drl_mode0[i][j] += counts->drl_mode0[i][j];
-
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- for (j = 0; j < 2; ++j)
- cm->counts.drl_mode1[i][j] += counts->drl_mode1[i][j];
+ cm->counts.drl_mode[i][j] += counts->drl_mode[i][j];
#if CONFIG_EXT_INTER
for (j = 0; j < 2; ++j)
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 84b01e0..53ae632 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -117,9 +117,7 @@
for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
vp10_diff_update_prob(r, &fc->refmv_prob[i]);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_diff_update_prob(r, &fc->drl_prob0[i]);
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_diff_update_prob(r, &fc->drl_prob1[i]);
+ vp10_diff_update_prob(r, &fc->drl_prob[i]);
#if CONFIG_EXT_INTER
vp10_diff_update_prob(r, &fc->new2mv_prob);
#endif // CONFIG_EXT_INTER
@@ -2322,7 +2320,7 @@
for (col = 0; col < max_blocks_wide; col += step)
eobtotal += reconstruct_inter_block(xd,
#if CONFIG_ANS
- pbi->token_tab, tok,
+ cm->token_tab, tok,
#else
r,
#endif
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index f52fae4..5b2fa1f 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -155,55 +155,45 @@
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
mbmi->ref_mv_idx = 0;
- if (xd->ref_mv_count[ref_frame_type] > 1 && mbmi->mode == NEWMV) {
- uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 0);
- vpx_prob drl_prob = cm->fc->drl_prob0[drl_ctx];
-
- if (!vpx_read(r, drl_prob)) {
- mbmi->ref_mv_idx = 0;
- return;
- }
- mbmi->ref_mv_idx = 1;
-
- if (xd->ref_mv_count[ref_frame_type] > 2) {
- drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 1);
- drl_prob = cm->fc->drl_prob0[drl_ctx];
- if (!vpx_read(r, drl_prob)) {
- mbmi->ref_mv_idx = 1;
- return;
- }
- mbmi->ref_mv_idx = 2;
- }
- return;
- }
-
- if (xd->ref_mv_count[ref_frame_type] > 2 && mbmi->mode == NEARMV) {
- uint8_t drl0_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 1);
- vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
- if (vpx_read(r, drl0_prob)) {
- mbmi->ref_mv_idx = 1;
- if (xd->counts)
- ++xd->counts->drl_mode0[drl0_ctx][1];
- if (xd->ref_mv_count[ref_frame_type] > 3) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], 2);
- vpx_prob drl1_prob = cm->fc->drl_prob1[drl1_ctx];
- if (vpx_read(r, drl1_prob)) {
- mbmi->ref_mv_idx = 2;
+ if (mbmi->mode == NEWMV) {
+ int idx;
+ for (idx = 0; idx < 2; ++idx) {
+ if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+ if (!vpx_read(r, drl_prob)) {
+ mbmi->ref_mv_idx = idx;
if (xd->counts)
- ++xd->counts->drl_mode1[drl1_ctx][1];
-
+ ++xd->counts->drl_mode[drl_ctx][0];
return;
}
-
+ mbmi->ref_mv_idx = idx + 1;
if (xd->counts)
- ++xd->counts->drl_mode1[drl1_ctx][0];
+ ++xd->counts->drl_mode[drl_ctx][1];
}
- return;
}
+ }
- if (xd->counts)
- ++xd->counts->drl_mode0[drl0_ctx][0];
+ if (mbmi->mode == NEARMV) {
+ int idx;
+ // Offset the NEARESTMV mode.
+ // TODO(jingning): Unify the two syntax decoding loops after the NEARESTMV
+ // mode is factored in.
+ for (idx = 1; idx < 3; ++idx) {
+ if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+ if (!vpx_read(r, drl_prob)) {
+ mbmi->ref_mv_idx = idx - 1;
+ if (xd->counts)
+ ++xd->counts->drl_mode[drl_ctx][0];
+ return;
+ }
+ mbmi->ref_mv_idx = idx;
+ if (xd->counts)
+ ++xd->counts->drl_mode[drl_ctx][1];
+ }
+ }
}
}
#endif
@@ -1401,6 +1391,10 @@
#else
if (b_mode == NEARESTMV || b_mode == NEARMV) {
#endif // CONFIG_EXT_INTER
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
+ uint8_t ref_mv_count[2];
+#endif
for (ref = 0; ref < 1 + is_compound; ++ref)
#if CONFIG_EXT_INTER
{
@@ -1409,6 +1403,10 @@
mv_ref_list, j, mi_row, mi_col, NULL);
#endif // CONFIG_EXT_INTER
vp10_append_sub8x8_mvs_for_idx(cm, xd, j, ref, mi_row, mi_col,
+#if CONFIG_REF_MV
+ ref_mv_stack[ref],
+ &ref_mv_count[ref],
+#endif
#if CONFIG_EXT_INTER
mv_ref_list,
#endif // CONFIG_EXT_INTER
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 386a17e..56832e2 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -28,6 +28,7 @@
#include "vp10/common/seg_common.h"
#include "vp10/common/tile_common.h"
+#include "vp10/encoder/buf_ans.h"
#include "vp10/encoder/cost.h"
#include "vp10/encoder/bitstream.h"
#include "vp10/encoder/encodemv.h"
@@ -196,39 +197,37 @@
assert(mbmi->ref_mv_idx < 3);
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 1 && mbmi->mode == NEWMV) {
- uint8_t drl_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
- vpx_prob drl_prob = cm->fc->drl_prob0[drl_ctx];
+ if (mbmi->mode == NEWMV) {
+ int idx;
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
- vpx_write(w, mbmi->ref_mv_idx != 0, drl_prob);
- if (mbmi->ref_mv_idx == 0)
- return;
-
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
- drl_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- drl_prob = cm->fc->drl_prob0[drl_ctx];
- vpx_write(w, mbmi->ref_mv_idx != 1, drl_prob);
+ vpx_write(w, mbmi->ref_mv_idx != idx, drl_prob);
+ if (mbmi->ref_mv_idx == idx)
+ return;
+ }
}
- if (mbmi->ref_mv_idx == 1)
- return;
-
- assert(mbmi->ref_mv_idx == 2);
return;
}
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 2 && mbmi->mode == NEARMV) {
- uint8_t drl0_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- vpx_prob drl0_prob = cm->fc->drl_prob0[drl0_ctx];
- vpx_write(w, mbmi->ref_mv_idx != 0, drl0_prob);
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 3 &&
- mbmi->ref_mv_idx > 0) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
- vpx_prob drl1_prob = cm->fc->drl_prob1[drl1_ctx];
- vpx_write(w, mbmi->ref_mv_idx != 1, drl1_prob);
+ if (mbmi->mode == NEARMV) {
+ int idx;
+ // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+
+ vpx_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob);
+ if (mbmi->ref_mv_idx == (idx - 1))
+ return;
+ }
}
+ return;
}
}
#endif
@@ -368,11 +367,8 @@
vp10_cond_prob_diff_update(w, &cm->fc->refmv_prob[i],
counts->refmv_mode[i]);
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_cond_prob_diff_update(w, &cm->fc->drl_prob0[i],
- counts->drl_mode0[i]);
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
- vp10_cond_prob_diff_update(w, &cm->fc->drl_prob1[i],
- counts->drl_mode1[i]);
+ vp10_cond_prob_diff_update(w, &cm->fc->drl_prob[i],
+ counts->drl_mode[i]);
#if CONFIG_EXT_INTER
vp10_cond_prob_diff_update(w, &cm->fc->new2mv_prob, counts->new2mv_mode);
#endif // CONFIG_EXT_INTER
@@ -659,65 +655,85 @@
*tp = p;
}
#else
-// This function serializes the tokens backwards both in token order and
-// bit order in each token.
-static void pack_mb_tokens_ans(struct AnsCoder *const ans,
- rans_dec_lut token_tab[COEFF_PROB_MODELS],
- const TOKENEXTRA *const start,
+// This function serializes the tokens in forward order using a buffered ans
+// coder.
+static void pack_mb_tokens_ans(struct BufAnsCoder *ans,
+ const rans_dec_lut token_tab[COEFF_PROB_MODELS],
+ TOKENEXTRA **tp,
const TOKENEXTRA *const stop,
- vpx_bit_depth_t bit_depth) {
- const TOKENEXTRA *p;
- TX_SIZE tx_size = TX_SIZES;
+ vpx_bit_depth_t bit_depth,
+ const TX_SIZE tx) {
+ TOKENEXTRA *p = *tp;
+#if CONFIG_VAR_TX
+ int count = 0;
+ const int seg_eob = 16 << (tx << 1);
+#endif // CONFIG_VAR_TX
- for (p = stop - 1; p >= start; --p) {
+ while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
- if (t == EOSB_TOKEN) {
- tx_size = (TX_SIZE)p->extra;
- } else {
#if CONFIG_VP9_HIGHBITDEPTH
- const vp10_extra_bit *const b =
- (bit_depth == VPX_BITS_12) ? &vp10_extra_bits_high12[t] :
- (bit_depth == VPX_BITS_10) ? &vp10_extra_bits_high10[t] :
- &vp10_extra_bits[t];
+ const vp10_extra_bit *b;
+ if (bit_depth == VPX_BITS_12)
+ b = &vp10_extra_bits_high12[t];
+ else if (bit_depth == VPX_BITS_10)
+ b = &vp10_extra_bits_high10[t];
+ else
+ b = &vp10_extra_bits[t];
#else
const vp10_extra_bit *const b = &vp10_extra_bits[t];
- (void) bit_depth;
+ (void)bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (t != EOB_TOKEN && t != ZERO_TOKEN) {
- // Write extra bits first
- const int e = p->extra;
- const int l = b->len;
- const int skip_bits = (t == CATEGORY6_TOKEN) ? TX_SIZES - 1 - tx_size : 0;
- assert(tx_size < TX_SIZES);
- uabs_write(ans, e & 1, 128);
- if (l) {
- const int v = e >> 1;
- int n;
- for (n = 0; n < l - skip_bits; ++n) {
- const int bb = (v >> n) & 1;
- uabs_write(ans, bb, b->prob[l - 1 - n]);
- }
- for (; n < l; ++n) {
- assert(((v >> n) & 1) == 0);
- }
- }
+ /* skip one or two nodes */
+ if (!p->skip_eob_node)
+ buf_uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]);
- {
+ if (t != EOB_TOKEN) {
+ buf_uabs_write(ans, t != ZERO_TOKEN, p->context_tree[1]);
+
+ if (t != ZERO_TOKEN) {
struct rans_sym s;
const rans_dec_lut *token_cdf =
&token_tab[p->context_tree[PIVOT_NODE] - 1];
s.cum_prob = (*token_cdf)[t - ONE_TOKEN];
s.prob = (*token_cdf)[t - ONE_TOKEN + 1] - s.cum_prob;
- rans_write(ans, &s);
+ buf_rans_write(ans, &s);
}
}
- if (t != EOB_TOKEN)
- uabs_write(ans, t != ZERO_TOKEN, p->context_tree[1]);
- if (!p->skip_eob_node)
- uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]);
+
+ if (b->base_val) {
+ const int e = p->extra, l = b->len;
+ int skip_bits = (b->base_val == CAT6_MIN_VAL) ? TX_SIZES - 1 - tx : 0;
+
+ if (l) {
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = l; /* number of bits in v, assumed nonzero */
+ int i = 0;
+
+ do {
+ const int bb = (v >> --n) & 1;
+ if (skip_bits) {
+ skip_bits--;
+ assert(!bb);
+ } else {
+ buf_uabs_write(ans, bb, pb[i >> 1]);
+ }
+ i = b->tree[i + bb];
+ } while (n);
+ }
+
+ buf_uabs_write(ans, e & 1, 128);
+ }
+ ++p;
+
+#if CONFIG_VAR_TX
+ ++count;
+ if (t == EOB_TOKEN || count == seg_eob) break;
+#endif // CONFIG_VAR_TX
}
- }
+
+ *tp = p;
}
#endif // !CONFIG_ANS
@@ -1436,8 +1452,11 @@
}
static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w, TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end,
+ vpx_writer *w,
+#if CONFIG_ANS
+ struct BufAnsCoder *ans,
+#endif // CONFIG_ANS
+ TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
#if CONFIG_SUPERTX
int supertx_enabled,
#endif
@@ -1492,7 +1511,6 @@
if (supertx_enabled) return;
#endif // CONFIG_SUPERTX
-#if !CONFIG_ANS
if (!m->mbmi.skip) {
assert(*tok < tok_end);
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
@@ -1528,18 +1546,26 @@
for (row = 0; row < num_4x4_h; row += bw)
for (col = 0; col < num_4x4_w; col += bw)
+#if CONFIG_ANS
+ pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth,
+ tx);
+#else
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif // CONFIG_ANS
}
#else
TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
: m->mbmi.tx_size;
+#if CONFIG_ANS
+ pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth, tx);
+#else
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif // CONFIG_ANS
#endif // CONFIG_VAR_TX
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
}
}
-#endif
}
static void write_partition(const VP10_COMMON *const cm,
@@ -1564,8 +1590,11 @@
}
}
-static void write_modes_sb(VP10_COMP *cpi,
- const TileInfo *const tile, vpx_writer *w,
+static void write_modes_sb(VP10_COMP *cpi, const TileInfo *const tile,
+ vpx_writer *w,
+#if CONFIG_ANS
+ struct BufAnsCoder *ans,
+#endif // CONFIG_ANS
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
#if CONFIG_SUPERTX
int supertx_enabled,
@@ -1634,7 +1663,11 @@
}
#endif // CONFIG_SUPERTX
if (subsize < BLOCK_8X8) {
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
@@ -1642,55 +1675,91 @@
} else {
switch (partition) {
case PARTITION_NONE:
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row, mi_col);
break;
case PARTITION_HORZ:
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row, mi_col);
if (mi_row + bs < cm->mi_rows)
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row + bs, mi_col);
break;
case PARTITION_VERT:
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row, mi_col);
if (mi_col + bs < cm->mi_cols)
- write_modes_b(cpi, tile, w, tok, tok_end,
+ write_modes_b(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row, mi_col + bs);
break;
case PARTITION_SPLIT:
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ write_modes_sb(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ write_modes_sb(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row, mi_col + bs, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ write_modes_sb(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
mi_row + bs, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ write_modes_sb(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
supertx_enabled,
#endif // CONFIG_SUPERTX
@@ -1716,7 +1785,12 @@
for (row = 0; row < num_4x4_h; row += bw)
for (col = 0; col < num_4x4_w; col += bw)
+#if CONFIG_ANS
+ pack_mb_tokens_ans(ans, cm->token_tab, tok, tok_end, cm->bit_depth,
+ tx);
+#else
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
}
@@ -1729,8 +1803,11 @@
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
-static void write_modes(VP10_COMP *cpi,
- const TileInfo *const tile, vpx_writer *w,
+static void write_modes(VP10_COMP *cpi, const TileInfo *const tile,
+ vpx_writer *w,
+#if CONFIG_ANS
+ struct BufAnsCoder *ans,
+#endif // CONFIG_ANS
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int mi_row, mi_col;
@@ -1740,7 +1817,11 @@
vp10_zero_left_context(xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- write_modes_sb(cpi, tile, w, tok, tok_end,
+ write_modes_sb(cpi, tile, w,
+#if CONFIG_ANS
+ ans,
+#endif // CONFIG_ANS
+ tok, tok_end,
#if CONFIG_SUPERTX
0,
#endif
@@ -2206,13 +2287,18 @@
vpx_writer mode_bc;
#if CONFIG_ANS
struct AnsCoder token_ans;
-#endif
+ struct BufAnsCoder buffered_ans;
+#endif // CONFIG_ANS
int tile_row, tile_col;
TOKENEXTRA *tok_end;
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
unsigned int max_tile = 0;
+ const int ans_window_size = get_token_alloc(cm->mb_rows, cm->mb_cols) * 3;
+ struct buffered_ans_symbol *uco_ans_buf =
+ malloc(ans_window_size * sizeof(*uco_ans_buf));
+ assert(uco_ans_buf);
vp10_zero_above_context(cm, 0, mi_cols_aligned_to_sb(cm->mi_cols));
@@ -2232,8 +2318,8 @@
#if !CONFIG_ANS
(void) token_section_size;
- write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
- &mode_bc, &tok, tok_end);
+ write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc, &tok,
+ tok_end);
assert(tok == tok_end);
vpx_stop_encode(&mode_bc);
if (put_tile_size) {
@@ -2248,12 +2334,13 @@
}
total_size += mode_bc.pos;
#else
+ buf_ans_write_init(&buffered_ans, uco_ans_buf, ans_window_size);
write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &mode_bc,
- NULL, NULL);
+ &buffered_ans, &tok, tok_end);
+ assert(tok == tok_end);
vpx_stop_encode(&mode_bc);
ans_write_init(&token_ans, mode_data_start + mode_bc.pos);
- pack_mb_tokens_ans(&token_ans, cm->token_tab, tok, tok_end,
- cm->bit_depth);
+ buf_ans_flush(&buffered_ans, &token_ans);
token_section_size = ans_write_end(&token_ans);
if (put_tile_size) {
// size of this tile
@@ -2267,6 +2354,9 @@
}
*max_tile_sz = max_tile;
+#if CONFIG_ANS
+ free(uco_ans_buf);
+#endif // CONFIG_ANS
return total_size;
}
diff --git a/vp10/encoder/buf_ans.h b/vp10/encoder/buf_ans.h
new file mode 100644
index 0000000..ae76873
--- /dev/null
+++ b/vp10/encoder/buf_ans.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_BUF_ANS_H_
+#define VP10_ENCODER_BUF_ANS_H_
+// Buffered forward ANS writer.
+// Symbols are written to the writer in forward (decode) order and serialzed
+// backwards due to ANS's stack like behavior.
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem_ops.h"
+#include "vp10/common/ans.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define ANS_METHOD_UABS 0
+#define ANS_METHOD_RANS 1
+
+struct buffered_ans_symbol {
+ uint8_t method; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
+ // TODO(aconverse): Should be possible to write this interms of start for ABS
+ AnsP8 val_start; // Boolean value for ABS, start in symbol cycle for Rans
+ AnsP8 prob; // Probability of this symbol
+};
+
+struct BufAnsCoder {
+ struct buffered_ans_symbol *buf;
+ int size;
+ int offset;
+};
+
+static INLINE void buf_ans_write_init(struct BufAnsCoder *const c,
+ struct buffered_ans_symbol *sym_arr,
+ int size) {
+ c->buf = sym_arr;
+ c->size = size;
+ c->offset = 0;
+}
+
+static INLINE void buf_uabs_write(struct BufAnsCoder *const c,
+ uint8_t val, AnsP8 prob) {
+ assert(c->offset < c->size);
+ c->buf[c->offset].method = ANS_METHOD_UABS;
+ c->buf[c->offset].val_start = val;
+ c->buf[c->offset].prob = prob;
+ ++c->offset;
+}
+
+static INLINE void buf_rans_write(struct BufAnsCoder *const c,
+ const struct rans_sym *const sym) {
+ assert(c->offset < c->size);
+ c->buf[c->offset].method = ANS_METHOD_RANS;
+ c->buf[c->offset].val_start = sym->cum_prob;
+ c->buf[c->offset].prob = sym->prob;
+ ++c->offset;
+}
+
+static INLINE void buf_ans_flush(const struct BufAnsCoder *const c,
+ struct AnsCoder *ans) {
+ int offset;
+ for (offset = c->offset - 1; offset >= 0; --offset) {
+ if (c->buf[offset].method == ANS_METHOD_RANS) {
+ struct rans_sym sym;
+ sym.prob = c->buf[offset].prob;
+ sym.cum_prob = c->buf[offset].val_start;
+ rans_write(ans, &sym);
+ } else {
+ uabs_write(ans, c->buf[offset].val_start, c->buf[offset].prob);
+ }
+ }
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_ENCODER_BUF_ANS_H_
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 8a1ee20..11d4a8e 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -36,219 +36,6 @@
#endif
}
-#if CONFIG_EXT_TX
-void fdst4(const tran_low_t *input, tran_low_t *output) {
- tran_high_t step[4];
- tran_high_t temp1, temp2;
-
- step[0] = input[0] - input[3];
- step[1] = -input[1] + input[2];
- step[2] = -input[1] - input[2];
- step[3] = input[0] + input[3];
-
- temp1 = (step[0] + step[1]) * cospi_16_64;
- temp2 = (step[0] - step[1]) * cospi_16_64;
- output[3] = fdct_round_shift(temp1);
- output[1] = fdct_round_shift(temp2);
- temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
- temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
- output[2] = fdct_round_shift(temp1);
- output[0] = fdct_round_shift(temp2);
-}
-
-void fdst8(const tran_low_t *input, tran_low_t *output) {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- // stage 1
- s0 = input[0] - input[7];
- s1 = -input[1] + input[6];
- s2 = input[2] - input[5];
- s3 = -input[3] + input[4];
- s4 = -input[3] - input[4];
- s5 = input[2] + input[5];
- s6 = -input[1] - input[6];
- s7 = input[0] + input[7];
-
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
- t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
- output[7] = fdct_round_shift(t0);
- output[5] = fdct_round_shift(t2);
- output[3] = fdct_round_shift(t1);
- output[1] = fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[6] = fdct_round_shift(t0);
- output[4] = fdct_round_shift(t2);
- output[2] = fdct_round_shift(t1);
- output[0] = fdct_round_shift(t3);
-}
-
-void fdst16(const tran_low_t *input, tran_low_t *output) {
- tran_high_t step1[8]; // canbe16
- tran_high_t step2[8]; // canbe16
- tran_high_t step3[8]; // canbe16
- tran_high_t in[8]; // canbe16
- tran_high_t temp1, temp2; // needs32
-
- // step 1
- in[0] = input[0] - input[15];
- in[1] = -input[1] + input[14];
- in[2] = input[2] - input[13];
- in[3] = -input[3] + input[12];
- in[4] = input[4] - input[11];
- in[5] = -input[5] + input[10];
- in[6] = input[6] - input[ 9];
- in[7] = -input[7] + input[ 8];
-
- step1[0] = -input[7] - input[ 8];
- step1[1] = input[6] + input[ 9];
- step1[2] = -input[5] - input[10];
- step1[3] = input[4] + input[11];
- step1[4] = -input[3] - input[12];
- step1[5] = input[2] + input[13];
- step1[6] = -input[1] - input[14];
- step1[7] = input[0] + input[15];
-
- // fdct8(step, step);
- {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- // stage 1
- s0 = in[0] + in[7];
- s1 = in[1] + in[6];
- s2 = in[2] + in[5];
- s3 = in[3] + in[4];
- s4 = in[3] - in[4];
- s5 = in[2] - in[5];
- s6 = in[1] - in[6];
- s7 = in[0] - in[7];
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
- t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
- output[15] = fdct_round_shift(t0);
- output[11] = fdct_round_shift(t2);
- output[7] = fdct_round_shift(t1);
- output[3] = fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[13] = fdct_round_shift(t0);
- output[9] = fdct_round_shift(t2);
- output[5] = fdct_round_shift(t1);
- output[1] = fdct_round_shift(t3);
- }
-
- // step 2
- temp1 = (step1[5] - step1[2]) * cospi_16_64;
- temp2 = (step1[4] - step1[3]) * cospi_16_64;
- step2[2] = fdct_round_shift(temp1);
- step2[3] = fdct_round_shift(temp2);
- temp1 = (step1[4] + step1[3]) * cospi_16_64;
- temp2 = (step1[5] + step1[2]) * cospi_16_64;
- step2[4] = fdct_round_shift(temp1);
- step2[5] = fdct_round_shift(temp2);
-
- // step 3
- step3[0] = step1[0] + step2[3];
- step3[1] = step1[1] + step2[2];
- step3[2] = step1[1] - step2[2];
- step3[3] = step1[0] - step2[3];
- step3[4] = step1[7] - step2[4];
- step3[5] = step1[6] - step2[5];
- step3[6] = step1[6] + step2[5];
- step3[7] = step1[7] + step2[4];
-
- // step 4
- temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
- temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
- step2[1] = fdct_round_shift(temp1);
- step2[2] = fdct_round_shift(temp2);
- temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
- temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
- step2[5] = fdct_round_shift(temp1);
- step2[6] = fdct_round_shift(temp2);
-
- // step 5
- step1[0] = step3[0] + step2[1];
- step1[1] = step3[0] - step2[1];
- step1[2] = step3[3] + step2[2];
- step1[3] = step3[3] - step2[2];
- step1[4] = step3[4] - step2[5];
- step1[5] = step3[4] + step2[5];
- step1[6] = step3[7] - step2[6];
- step1[7] = step3[7] + step2[6];
-
- // step 6
- temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
- temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
- output[14] = fdct_round_shift(temp1);
- output[6] = fdct_round_shift(temp2);
-
- temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
- temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
- output[10] = fdct_round_shift(temp1);
- output[2] = fdct_round_shift(temp2);
-
- temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
- temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
- output[12] = fdct_round_shift(temp1);
- output[4] = fdct_round_shift(temp2);
-
- temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
- temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
- output[8] = fdct_round_shift(temp1);
- output[0] = fdct_round_shift(temp2);
-}
-#endif // CONFIG_EXT_TX
-
static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
@@ -1236,22 +1023,6 @@
output[i] = input[i] * 4;
}
-// For use in lieu of DST
-static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[16];
- for (i = 0; i < 8; ++i) {
- output[16 + i] = input[i] * 4;
- output[24 + i] = input[24 + i] * 4;
- }
- // Multiply input by sqrt(2)
- for (i = 0; i < 16; ++i) {
- inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2);
- }
- fdct16(inputhalf, output);
- // Note overall scaling factor is 4 times orthogonal
-}
-
// For use in lieu of ADST
static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -1334,25 +1105,22 @@
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
case IDTX:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
- case FLIPADST_DST:
+ case V_FLIPADST:
copy_flipud(*src, *src_stride, l, buff, l);
*src = buff;
*src_stride = l;
break;
case DCT_FLIPADST:
case ADST_FLIPADST:
- case DST_FLIPADST:
+ case H_FLIPADST:
copy_fliplr(*src, *src_stride, l, buff, l);
*src = buff;
*src_stride = l;
@@ -1370,98 +1138,86 @@
#endif // CONFIG_EXT_TX
static const transform_2d FHT_4[] = {
- { fdct4, fdct4 }, // DCT_DCT = 0,
- { fadst4, fdct4 }, // ADST_DCT = 1,
- { fdct4, fadst4 }, // DCT_ADST = 2,
- { fadst4, fadst4 }, // ADST_ADST = 3,
+ { fdct4, fdct4 }, // DCT_DCT
+ { fadst4, fdct4 }, // ADST_DCT
+ { fdct4, fadst4 }, // DCT_ADST
+ { fadst4, fadst4 }, // ADST_ADST
#if CONFIG_EXT_TX
- { fadst4, fdct4 }, // FLIPADST_DCT = 4,
- { fdct4, fadst4 }, // DCT_FLIPADST = 5,
- { fadst4, fadst4 }, // FLIPADST_FLIPADST = 6,
- { fadst4, fadst4 }, // ADST_FLIPADST = 7,
- { fadst4, fadst4 }, // FLIPADST_ADST = 8,
- { fdst4, fdct4 }, // DST_DCT = 9,
- { fdct4, fdst4 }, // DCT_DST = 10,
- { fdst4, fadst4 }, // DST_ADST = 11,
- { fadst4, fdst4 }, // ADST_DST = 12,
- { fdst4, fadst4 }, // DST_FLIPADST = 13,
- { fadst4, fdst4 }, // FLIPADST_DST = 14,
- { fdst4, fdst4 }, // DST_DST = 15
- { fidtx4, fidtx4 }, // IDTX = 16
- { fdct4, fidtx4 }, // V_DCT = 17
- { fidtx4, fdct4 }, // H_DCT = 18
+ { fadst4, fdct4 }, // FLIPADST_DCT
+ { fdct4, fadst4 }, // DCT_FLIPADST
+ { fadst4, fadst4 }, // FLIPADST_FLIPADST
+ { fadst4, fadst4 }, // ADST_FLIPADST
+ { fadst4, fadst4 }, // FLIPADST_ADST
+ { fidtx4, fidtx4 }, // IDTX
+ { fdct4, fidtx4 }, // V_DCT
+ { fidtx4, fdct4 }, // H_DCT
+ { fadst4, fidtx4 }, // V_ADST
+ { fidtx4, fadst4 }, // H_ADST
+ { fadst4, fidtx4 }, // V_FLIPADST
+ { fidtx4, fadst4 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_8[] = {
- { fdct8, fdct8 }, // DCT_DCT = 0,
- { fadst8, fdct8 }, // ADST_DCT = 1,
- { fdct8, fadst8 }, // DCT_ADST = 2,
- { fadst8, fadst8 }, // ADST_ADST = 3,
+ { fdct8, fdct8 }, // DCT_DCT
+ { fadst8, fdct8 }, // ADST_DCT
+ { fdct8, fadst8 }, // DCT_ADST
+ { fadst8, fadst8 }, // ADST_ADST
#if CONFIG_EXT_TX
- { fadst8, fdct8 }, // FLIPADST_DCT = 4,
- { fdct8, fadst8 }, // DCT_FLIPADST = 5,
- { fadst8, fadst8 }, // FLIPADST_FLIPADST = 6,
- { fadst8, fadst8 }, // ADST_FLIPADST = 7,
- { fadst8, fadst8 }, // FLIPADST_ADST = 8,
- { fdst8, fdct8 }, // DST_DCT = 9,
- { fdct8, fdst8 }, // DCT_DST = 10,
- { fdst8, fadst8 }, // DST_ADST = 11,
- { fadst8, fdst8 }, // ADST_DST = 12,
- { fdst8, fadst8 }, // DST_FLIPADST = 13,
- { fadst8, fdst8 }, // FLIPADST_DST = 14,
- { fdst8, fdst8 }, // DST_DST = 15
- { fidtx8, fidtx8 }, // IDTX = 16
- { fdct8, fidtx8 }, // V_DCT = 17
- { fidtx8, fdct8 }, // H_DCT = 18
+ { fadst8, fdct8 }, // FLIPADST_DCT
+ { fdct8, fadst8 }, // DCT_FLIPADST
+ { fadst8, fadst8 }, // FLIPADST_FLIPADST
+ { fadst8, fadst8 }, // ADST_FLIPADST
+ { fadst8, fadst8 }, // FLIPADST_ADST
+ { fidtx8, fidtx8 }, // IDTX
+ { fdct8, fidtx8 }, // V_DCT
+ { fidtx8, fdct8 }, // H_DCT
+ { fadst8, fidtx8 }, // V_ADST
+ { fidtx8, fadst8 }, // H_ADST
+ { fadst8, fidtx8 }, // V_FLIPADST
+ { fidtx8, fadst8 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
static const transform_2d FHT_16[] = {
- { fdct16, fdct16 }, // DCT_DCT = 0,
- { fadst16, fdct16 }, // ADST_DCT = 1,
- { fdct16, fadst16 }, // DCT_ADST = 2,
- { fadst16, fadst16 }, // ADST_ADST = 3,
+ { fdct16, fdct16 }, // DCT_DCT
+ { fadst16, fdct16 }, // ADST_DCT
+ { fdct16, fadst16 }, // DCT_ADST
+ { fadst16, fadst16 }, // ADST_ADST
#if CONFIG_EXT_TX
- { fadst16, fdct16 }, // FLIPADST_DCT = 4,
- { fdct16, fadst16 }, // DCT_FLIPADST = 5,
- { fadst16, fadst16 }, // FLIPADST_FLIPADST = 6,
- { fadst16, fadst16 }, // ADST_FLIPADST = 7,
- { fadst16, fadst16 }, // FLIPADST_ADST = 8,
- { fdst16, fdct16 }, // DST_DCT = 9,
- { fdct16, fdst16 }, // DCT_DST = 10,
- { fdst16, fadst16 }, // DST_ADST = 11,
- { fadst16, fdst16 }, // ADST_DST = 12,
- { fdst16, fadst16 }, // DST_FLIPADST = 13,
- { fadst16, fdst16 }, // FLIPADST_DST = 14,
- { fdst16, fdst16 }, // DST_DST = 15
- { fidtx16, fidtx16 }, // IDTX = 16
- { fdct16, fidtx16 }, // V_DCT = 17
- { fidtx16, fdct16 }, // H_DCT = 18
+ { fadst16, fdct16 }, // FLIPADST_DCT
+ { fdct16, fadst16 }, // DCT_FLIPADST
+ { fadst16, fadst16 }, // FLIPADST_FLIPADST
+ { fadst16, fadst16 }, // ADST_FLIPADST
+ { fadst16, fadst16 }, // FLIPADST_ADST
+ { fidtx16, fidtx16 }, // IDTX
+ { fdct16, fidtx16 }, // V_DCT
+ { fidtx16, fdct16 }, // H_DCT
+ { fadst16, fidtx16 }, // V_ADST
+ { fidtx16, fadst16 }, // H_ADST
+ { fadst16, fidtx16 }, // V_FLIPADST
+ { fidtx16, fadst16 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
#if CONFIG_EXT_TX
static const transform_2d FHT_32[] = {
- { fdct32, fdct32 }, // DCT_DCT = 0,
- { fhalfright32, fdct32 }, // ADST_DCT = 1,
- { fdct32, fhalfright32 }, // DCT_ADST = 2,
- { fhalfright32, fhalfright32 }, // ADST_ADST = 3,
- { fhalfright32, fdct32 }, // FLIPADST_DCT = 4,
- { fdct32, fhalfright32 }, // DCT_FLIPADST = 5,
- { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST = 6,
- { fhalfright32, fhalfright32 }, // ADST_FLIPADST = 7,
- { fhalfright32, fhalfright32 }, // FLIPADST_ADST = 8,
- { fhalfcenter32, fdct32 }, // DST_DCT = 9,
- { fdct32, fhalfcenter32 }, // DCT_DST = 10,
- { fhalfcenter32, fhalfright32 }, // DST_ADST = 11,
- { fhalfright32, fhalfcenter32 }, // ADST_DST = 12,
- { fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
- { fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
- { fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
- { fidtx32, fidtx32 }, // IDTX = 16
- { fdct32, fidtx32 }, // V_DCT = 17
- { fidtx32, fdct32 }, // H_DCT = 18
+ { fdct32, fdct32 }, // DCT_DCT
+ { fhalfright32, fdct32 }, // ADST_DCT
+ { fdct32, fhalfright32 }, // DCT_ADST
+ { fhalfright32, fhalfright32 }, // ADST_ADST
+ { fhalfright32, fdct32 }, // FLIPADST_DCT
+ { fdct32, fhalfright32 }, // DCT_FLIPADST
+ { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST
+ { fhalfright32, fhalfright32 }, // ADST_FLIPADST
+ { fhalfright32, fhalfright32 }, // FLIPADST_ADST
+ { fidtx32, fidtx32 }, // IDTX
+ { fdct32, fidtx32 }, // V_DCT
+ { fidtx32, fdct32 }, // H_DCT
+ { fhalfright32, fidtx32 }, // V_ADST
+ { fidtx32, fhalfright32 }, // H_ADST
+ { fhalfright32, fidtx32 }, // V_FLIPADST
+ { fidtx32, fhalfright32 }, // H_FLIPADST
};
#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 2c47be9..08ba019 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1234,7 +1234,10 @@
PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int output_enabled) {
- int i, y, x_idx;
+ int y, x_idx;
+#if CONFIG_VAR_TX
+ int i;
+#endif
VP10_COMMON *const cm = &cpi->common;
RD_COUNTS *const rdc = &td->rd_counts;
MACROBLOCK *const x = &td->mb;
@@ -1909,24 +1912,34 @@
#endif // CONFIG_EXT_INTER
mode_ctx);
+ if (mode == NEWMV) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ int idx;
+
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
+
+ if (mbmi->ref_mv_idx == idx)
+ break;
+ }
+ }
+ }
+
if (mode == NEARMV) {
uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
- uint8_t drl0_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- if (mbmi->ref_mv_idx == 0)
- ++counts->drl_mode0[drl0_ctx][0];
- else
- ++counts->drl_mode0[drl0_ctx][1];
+ int idx;
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 3 &&
- mbmi->ref_mv_idx > 0) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
- if (mbmi->ref_mv_idx == 1)
- ++counts->drl_mode1[drl1_ctx][0];
- else
- ++counts->drl_mode1[drl1_ctx][1];
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
+
+ if (mbmi->ref_mv_idx == idx - 1)
+ break;
}
}
}
@@ -5314,11 +5327,6 @@
#if CONFIG_EXT_TX
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX && *best_tx == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
#else
if (tx_size >= TX_32X32 && tx_type != DCT_DCT)
continue;
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index a319901..e02bf6c 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -487,7 +487,6 @@
int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
- int drl_mode_cost1[DRL_MODE_CONTEXTS][2];
#if CONFIG_EXT_INTER
int new2mv_mode_cost[2];
#endif // CONFIG_EXT_INTER
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index faedb43..785fef0 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -54,17 +54,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -96,17 +93,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -138,17 +132,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -180,17 +171,14 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -227,15 +215,12 @@
case FLIPADST_ADST:
vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
break;
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
@@ -270,15 +255,12 @@
case FLIPADST_ADST:
vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
break;
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST exists only in C
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
@@ -314,15 +296,12 @@
case FLIPADST_ADST:
vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
break;
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
// Use C version since DST exists only in C
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
@@ -355,15 +334,12 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- case DST_DST:
- case DCT_DST:
- case DST_DCT:
- case DST_ADST:
- case ADST_DST:
- case DST_FLIPADST:
- case FLIPADST_DST:
- case H_DCT:
case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX:
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 78e8e9a..8d55ab4 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -418,13 +418,8 @@
}
for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
- cpi->drl_mode_cost0[i][0] = vp10_cost_bit(cm->fc->drl_prob0[i], 0);
- cpi->drl_mode_cost0[i][1] = vp10_cost_bit(cm->fc->drl_prob0[i], 1);
- }
-
- for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
- cpi->drl_mode_cost1[i][0] = vp10_cost_bit(cm->fc->drl_prob1[i], 0);
- cpi->drl_mode_cost1[i][1] = vp10_cost_bit(cm->fc->drl_prob1[i], 1);
+ cpi->drl_mode_cost0[i][0] = vp10_cost_bit(cm->fc->drl_prob[i], 0);
+ cpi->drl_mode_cost0[i][1] = vp10_cost_bit(cm->fc->drl_prob[i], 1);
}
#if CONFIG_EXT_INTER
cpi->new2mv_mode_cost[0] = vp10_cost_bit(cm->fc->new2mv_prob, 0);
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 3d92591..87c9d0f 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1311,6 +1311,179 @@
}
#endif // CONFIG_SUPERTX
+static int64_t txfm_yrd(VP10_COMP *cpi, MACROBLOCK *x,
+ int *r, int64_t *d, int *s, int64_t *sse,
+ int64_t ref_best_rd,
+ BLOCK_SIZE bs, TX_TYPE tx_type, int tx_size) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd = INT64_MAX;
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0, s1;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ const int is_inter = is_inter_block(mbmi);
+ const int r_tx_size =
+ cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)][tx_size];
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+
+ assert(skip_prob > 0);
+ s0 = vp10_cost_bit(skip_prob, 0);
+ s1 = vp10_cost_bit(skip_prob, 1);
+
+ mbmi->tx_type = tx_type;
+ mbmi->tx_size = tx_size;
+ txfm_rd_in_plane(x,
+ cpi,
+ r, d, s,
+ sse, ref_best_rd, 0, bs, tx_size,
+ cpi->sf.use_fast_coef_costing);
+ if (*r == INT_MAX)
+ return INT64_MAX;
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(tx_size, bs, is_inter);
+ if (get_ext_tx_types(tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *r += cpi->inter_tx_type_costs[ext_tx_set]
+ [mbmi->tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+
+#else
+ if (tx_size < TX_32X32 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] && !FIXED_TX_TYPE) {
+ if (is_inter) {
+ *r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ *r += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+ }
+#endif // CONFIG_EXT_TX
+
+ if (*s) {
+ if (is_inter) {
+ rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, *sse);
+ }
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, *r + s0 + r_tx_size * tx_select, *d);
+ }
+
+ if (tx_select && !(*s && is_inter))
+ *r += r_tx_size;
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*s))
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+
+ return rd;
+}
+
+static int64_t choose_tx_size_fix_type(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate,
+ int64_t *distortion,
+ int *skip,
+ int64_t *psse,
+ int64_t ref_best_rd,
+ BLOCK_SIZE bs, TX_TYPE tx_type,
+ int prune) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int r, s;
+ int64_t d, sse;
+ int64_t rd = INT64_MAX;
+ int n;
+ int start_tx, end_tx;
+ int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
+ TX_SIZE best_tx = max_tx_size;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+
+ if (tx_select) {
+ start_tx = max_tx_size;
+ end_tx = 0;
+ } else {
+ const TX_SIZE chosen_tx_size =
+ VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ start_tx = chosen_tx_size;
+ end_tx = chosen_tx_size;
+ }
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skip = 0;
+ *psse = INT64_MAX;
+
+ mbmi->tx_type = tx_type;
+ last_rd = INT64_MAX;
+ for (n = start_tx; n >= end_tx; --n) {
+ if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n))
+ continue;
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(n, bs, is_inter);
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type])
+ continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ }
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
+ continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type])
+ continue;
+ }
+#else // CONFIG_EXT_TX
+ if (n >= TX_32X32 && tx_type != DCT_DCT)
+ continue;
+ if (is_inter && cpi->sf.tx_type_search > 0 &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+#endif // CONFIG_EXT_TX
+
+ rd = txfm_yrd(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs, tx_type, n);
+
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd == INT64_MAX ||
+ (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
+ (n < (int) max_tx_size && rd > last_rd)))
+ break;
+
+ last_rd = rd;
+ if (rd < best_rd) {
+ best_tx = n;
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ }
+ }
+ mbmi->tx_size = best_tx;
+
+ return best_rd;
+}
+
static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skip, int64_t *sse,
@@ -1349,11 +1522,6 @@
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
- } else if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
@@ -1362,12 +1530,6 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
}
mbmi->tx_type = tx_type;
@@ -1475,166 +1637,36 @@
int64_t *psse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
- const TX_SIZE max_tx_size = max_txsize_lookup[bs];
- VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
int r, s;
int64_t d, sse;
int64_t rd = INT64_MAX;
- int n;
- int s0, s1;
- int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
- TX_SIZE best_tx = max_tx_size;
- int start_tx, end_tx;
- const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ int64_t best_rd = INT64_MAX;
+ TX_SIZE best_tx = max_txsize_lookup[bs];
const int is_inter = is_inter_block(mbmi);
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
-#if CONFIG_EXT_TX
- int ext_tx_set;
-#endif // CONFIG_EXT_TX
if (is_inter && cpi->sf.tx_type_search > 0)
prune = prune_tx_types(cpi, bs, x, xd);
- assert(skip_prob > 0);
- s0 = vp10_cost_bit(skip_prob, 0);
- s1 = vp10_cost_bit(skip_prob, 1);
-
- if (tx_select) {
- start_tx = max_tx_size;
- end_tx = 0;
- } else {
- const TX_SIZE chosen_tx_size =
- VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
- start_tx = chosen_tx_size;
- end_tx = chosen_tx_size;
- }
-
*distortion = INT64_MAX;
*rate = INT_MAX;
*skip = 0;
*psse = INT64_MAX;
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
- last_rd = INT64_MAX;
- for (n = start_tx; n >= end_tx; --n) {
- const int r_tx_size =
- cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)][n];
- if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n))
- continue;
-#if CONFIG_EXT_TX
- ext_tx_set = get_ext_tx_set(n, bs, is_inter);
- if (is_inter) {
- if (!ext_tx_used_inter[ext_tx_set][tx_type])
- continue;
- if (cpi->sf.tx_type_search > 0) {
- if (!do_tx_type_search(tx_type, prune))
- continue;
- } else if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
- } else {
- if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
- if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
- continue;
- }
- if (!ext_tx_used_intra[ext_tx_set][tx_type])
- continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
- }
- mbmi->tx_type = tx_type;
- txfm_rd_in_plane(x,
- cpi,
- &r, &d, &s,
- &sse, ref_best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
- if (get_ext_tx_types(n, bs, is_inter) > 1 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- r != INT_MAX) {
- if (is_inter) {
- if (ext_tx_set > 0)
- r += cpi->inter_tx_type_costs[ext_tx_set]
- [mbmi->tx_size][mbmi->tx_type];
- } else {
- if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
- r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
- [mbmi->mode][mbmi->tx_type];
- }
- }
-#else // CONFIG_EXT_TX
- if (n >= TX_32X32 && tx_type != DCT_DCT) {
- continue;
- }
- mbmi->tx_type = tx_type;
- txfm_rd_in_plane(x,
- cpi,
- &r, &d, &s,
- &sse, ref_best_rd, 0, bs, n,
- cpi->sf.use_fast_coef_costing);
- if (n < TX_32X32 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- r != INT_MAX && !FIXED_TX_TYPE) {
- if (is_inter) {
- r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
- continue;
- } else {
- r += cpi->intra_tx_type_costs[mbmi->tx_size]
- [intra_mode_to_tx_type_context[mbmi->mode]]
- [mbmi->tx_type];
- }
- }
-#endif // CONFIG_EXT_TX
-
- if (r == INT_MAX)
- continue;
-
- if (s) {
- if (is_inter) {
- rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
- } else {
- rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
- }
- } else {
- rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
- }
-
- if (tx_select && !(s && is_inter))
- r += r_tx_size;
-
- if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
- rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
-
- // Early termination in transform size search.
- if (cpi->sf.tx_size_search_breakout &&
- (rd == INT64_MAX ||
- (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
- (n < (int) max_tx_size && rd > last_rd)))
- break;
-
- last_rd = rd;
- if (rd <
- (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
- best_rd) {
- best_tx = n;
- best_rd = rd;
- *distortion = d;
- *rate = r;
- *skip = s;
- *psse = sse;
- best_tx_type = mbmi->tx_type;
- }
+ rd = choose_tx_size_fix_type(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs,
+ tx_type, prune);
+ if (rd < (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ best_tx_type = tx_type;
+ best_tx = mbmi->tx_size;
}
}
@@ -3124,21 +3156,75 @@
}
}
+static int64_t select_tx_size_fix_type(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *dist,
+ int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd, TX_TYPE tx_type) {
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+ int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+#endif // CONFIG_EXT_TX
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ int64_t rd;
+
+ mbmi->tx_type = tx_type;
+ inter_block_yrd(cpi, x, rate, dist, skippable, sse, bsize, ref_best_rd);
+
+ if (*rate == INT_MAX)
+ return INT64_MAX;
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *rate += cpi->inter_tx_type_costs[ext_tx_set]
+ [max_tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
+ [mbmi->mode][mbmi->tx_type];
+ }
+ }
+#else // CONFIG_EXT_TX
+ if (max_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter)
+ *rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
+ else
+ *rate += cpi->intra_tx_type_costs[max_tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]][mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
+
+ if (*skippable)
+ rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ else
+ rd = RDCOST(x->rdmult, x->rddiv, *rate + s0, *dist);
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*skippable))
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+
+ return rd;
+}
+
static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
- const VP10_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd = INT64_MAX;
int64_t best_rd = INT64_MAX;
TX_TYPE tx_type, best_tx_type = DCT_DCT;
const int is_inter = is_inter_block(mbmi);
- vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
- int s0 = vp10_cost_bit(skip_prob, 0);
- int s1 = vp10_cost_bit(skip_prob, 1);
TX_SIZE best_tx_size[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
TX_SIZE best_tx = TX_SIZES;
uint8_t best_blk_skip[256];
@@ -3169,11 +3255,6 @@
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
- } else if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
@@ -3182,66 +3263,16 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
- if (ext_tx_set == 1 &&
- tx_type >= DST_ADST && tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
- }
-
- mbmi->tx_type = tx_type;
-
- inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
- bsize, ref_best_rd);
-
- if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- this_rate != INT_MAX) {
- if (is_inter) {
- if (ext_tx_set > 0)
- this_rate += cpi->inter_tx_type_costs[ext_tx_set]
- [max_tx_size][mbmi->tx_type];
- } else {
- if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
- this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
- [mbmi->mode][mbmi->tx_type];
- }
}
#else // CONFIG_EXT_TX
- if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT)
- continue;
-
- mbmi->tx_type = tx_type;
-
- inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
- bsize, ref_best_rd);
-
- if (max_tx_size < TX_32X32 &&
- !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- this_rate != INT_MAX) {
- if (is_inter) {
- this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
- if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
- continue;
- } else {
- this_rate += cpi->intra_tx_type_costs[max_tx_size]
- [intra_mode_to_tx_type_context[mbmi->mode]]
- [mbmi->tx_type];
- }
- }
-#endif // CONFIG_EXT_TX
-
- if (this_rate == INT_MAX)
+ if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT)
continue;
-
- if (this_skip)
- rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
- else
- rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
-
- if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
- rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
+ if (is_inter && cpi->sf.tx_type_search > 0 &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+#endif // CONFIG_EXT_TX
+ rd = select_tx_size_fix_type(cpi, x, &this_rate, &this_dist, &this_skip,
+ &this_sse, bsize, ref_best_rd, tx_type);
if (rd < (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
best_rd = rd;
@@ -4790,6 +4821,10 @@
int64_t best_rd = INT64_MAX;
const int i = idy * 2 + idx;
int ref;
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
+ uint8_t ref_mv_count[2];
+#endif
#if CONFIG_EXT_INTER
int mv_idx;
int_mv ref_mvs_sub8x8[2][2];
@@ -4804,6 +4839,10 @@
#endif // CONFIG_EXT_INTER
frame_mv[ZEROMV][frame].as_int = 0;
vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
+#if CONFIG_REF_MV
+ ref_mv_stack[ref],
+ &ref_mv_count[ref],
+#endif
#if CONFIG_EXT_INTER
mv_ref_list,
#endif // CONFIG_EXT_INTER
@@ -5178,6 +5217,7 @@
bsi->rdstat[i][mode_idx].tl,
idy, idx,
mi_row, mi_col);
+
if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
bsi->rdstat[i][mode_idx].brate, 0);
@@ -8059,26 +8099,18 @@
// TODO(jingning): This should be deprecated shortly.
int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
-
int ref_set =
VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
- uint8_t drl0_ctx = 0;
- uint8_t drl_ctx = 0;
-
+ uint8_t drl_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ idx_offset);
// Dummy
int_mv backup_fmv[2];
backup_fmv[0] = frame_mv[NEWMV][ref_frame];
if (comp_pred)
backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
- if (mbmi->mode == NEARMV) {
- drl0_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- rate2 += cpi->drl_mode_cost0[drl0_ctx][0];
- } else {
- drl_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 0);
- rate2 += cpi->drl_mode_cost0[drl_ctx][0];
- }
+ rate2 += cpi->drl_mode_cost0[drl_ctx][0];
if (this_rd < INT64_MAX) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
@@ -8164,23 +8196,20 @@
&tmp_sse, best_rd);
}
- if (this_mode == NEARMV) {
- tmp_rate += cpi->drl_mode_cost0[drl0_ctx][1];
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 3) {
- uint8_t drl1_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 2);
- tmp_rate += cpi->drl_mode_cost1[drl1_ctx][ref_idx];
- }
+ for (i = 0; i < mbmi->ref_mv_idx; ++i) {
+ uint8_t drl1_ctx = 0;
+ drl1_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ i + idx_offset);
+ tmp_rate += cpi->drl_mode_cost0[drl1_ctx][1];
}
- if (this_mode == NEWMV) {
- tmp_rate += cpi->drl_mode_cost0[drl_ctx][1];
-
- if (mbmi_ext->ref_mv_count[ref_frame_type] > 2) {
- uint8_t this_drl_ctx =
- vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], 1);
- tmp_rate += cpi->drl_mode_cost0[this_drl_ctx][ref_idx];
- }
+ if (mbmi_ext->ref_mv_count[ref_frame_type] >
+ mbmi->ref_mv_idx + idx_offset + 1 &&
+ ref_idx < ref_set - 1) {
+ uint8_t drl1_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ mbmi->ref_mv_idx + idx_offset);
+ tmp_rate += cpi->drl_mode_cost0[drl1_ctx][0];
}
if (tmp_alt_rd < INT64_MAX) {
@@ -8435,6 +8464,9 @@
if (cm->allow_screen_content_tools && !is_inter_mode(best_mbmode.mode)) {
PREDICTION_MODE mode_selected;
int rate2 = 0, rate_y = 0;
+#if CONFIG_SUPERTX
+ int best_rate_nocoef;
+#endif
int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
int skippable = 0, rate_overhead = 0;
TX_SIZE best_tx_size, uv_tx;
@@ -8504,8 +8536,14 @@
if (skippable) {
rate2 -= (rate_y + rate_uv_tokenonly[uv_tx]);
+#if CONFIG_SUPERTX
+ best_rate_nocoef = rate2;
+#endif
rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
} else {
+#if CONFIG_SUPERTX
+ best_rate_nocoef = rate2 - (rate_y + rate_uv_tokenonly[uv_tx]);
+#endif
rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
}
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -8515,6 +8553,9 @@
mbmi->mv[0].as_int = 0;
max_plane = 1;
rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = best_rate_nocoef;
+#endif
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
best_rd = this_rd;
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c
index 7aaef5b..822ccc9 100644
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -693,14 +693,6 @@
vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
&arg);
(*t)->token = EOSB_TOKEN;
-#if CONFIG_ANS
- // TODO(aconverse): clip the number of bits in tokenize_b
- // Smuggle TX_SIZE in the unused extrabits field so the ANS encoder
- // knows the maximum number of extrabits to write at the end of the block
- // (where it starts).
- (*t)->extra = (EXTRABIT)(plane ? get_uv_tx_size(mbmi, &xd->plane[plane])
- : mbmi->tx_size);
-#endif // CONFIG_ANS
(*t)++;
}
} else {
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c
index 8ff7c9c..8a55425 100644
--- a/vp10/encoder/x86/dct_sse2.c
+++ b/vp10/encoder/x86/dct_sse2.c
@@ -172,42 +172,6 @@
transpose_4x4(in);
}
-#if CONFIG_EXT_TX
-static void fdst4_sse2(__m128i *in) {
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64);
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
- const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-
- __m128i u[4], v[4];
-
- u[0] = _mm_unpacklo_epi16(in[0], in[1]);
- u[1] = _mm_unpacklo_epi16(in[3], in[2]);
-
- v[0] = _mm_add_epi16(u[0], u[1]);
- v[1] = _mm_sub_epi16(u[0], u[1]);
-
- u[0] = _mm_madd_epi16(v[0], k__cospi_p24_p08);
- u[1] = _mm_madd_epi16(v[1], k__cospi_p16_p16);
- u[2] = _mm_madd_epi16(v[0], k__cospi_p08_m24);
- u[3] = _mm_madd_epi16(v[1], k__cospi_p16_m16);
-
- v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
- v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
- v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
- v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
- u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
- u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
- u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
- u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
-
- in[0] = _mm_packs_epi32(u[0], u[2]);
- in[1] = _mm_packs_epi32(u[1], u[3]);
- transpose_4x4(in);
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
__m128i in[4];
@@ -265,48 +229,6 @@
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
- case DST_DST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdst4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DCT_DST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdct4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DST_DCT:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdst4_sse2(in);
- fdct4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DST_ADST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fdst4_sse2(in);
- fadst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case ADST_DST:
- load_buffer_4x4(input, in, stride, 0, 0);
- fadst4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case DST_FLIPADST:
- load_buffer_4x4(input, in, stride, 0, 1);
- fdst4_sse2(in);
- fadst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
- case FLIPADST_DST:
- load_buffer_4x4(input, in, stride, 1, 0);
- fadst4_sse2(in);
- fdst4_sse2(in);
- write_buffer_4x4(output, in);
- break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -1288,155 +1210,6 @@
array_transpose_8x8(in, in);
}
-#if CONFIG_EXT_TX
-static void fdst8_sse2(__m128i *in) {
- // Constants
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64);
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t) -cospi_16_64);
- const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
- const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
- const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
- const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
- const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
- const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-
- __m128i s0, s1, s2, s3, s4, s5, s6, s7;
- __m128i x0, x1, x2, x3, x4, x5, x6, x7;
- __m128i t0, t1, t2, t3, t4, t5, t6, t7;
-
- s0 = _mm_sub_epi16(in[0], in[7]);
- s1 = _mm_sub_epi16(in[1], in[6]); // -s1
- s2 = _mm_sub_epi16(in[2], in[5]);
- s3 = _mm_sub_epi16(in[3], in[4]); // -s3
- s4 = _mm_add_epi16(in[3], in[4]); // -s4
- s5 = _mm_add_epi16(in[2], in[5]);
- s6 = _mm_add_epi16(in[1], in[6]); // -s6
- s7 = _mm_add_epi16(in[0], in[7]);
-
- x0 = _mm_sub_epi16(s0, s3);
- x1 = _mm_sub_epi16(s1, s2); // -x1
- x2 = _mm_add_epi16(s1, s2); // -x2
- x3 = _mm_add_epi16(s0, s3);
-
- // Interleave
- t0 = _mm_unpacklo_epi16(x0, x1);
- t1 = _mm_unpackhi_epi16(x0, x1);
- t2 = _mm_unpacklo_epi16(x2, x3);
- t3 = _mm_unpackhi_epi16(x2, x3);
-
- // Perform butterfly multiplication/addition
- x0 = _mm_madd_epi16(t0, k__cospi_p16_m16);
- x1 = _mm_madd_epi16(t1, k__cospi_p16_m16);
- x2 = _mm_madd_epi16(t0, k__cospi_p16_p16);
- x3 = _mm_madd_epi16(t1, k__cospi_p16_p16);
- x4 = _mm_madd_epi16(t2, k__cospi_m24_p08);
- x5 = _mm_madd_epi16(t3, k__cospi_m24_p08);
- x6 = _mm_madd_epi16(t2, k__cospi_p08_p24);
- x7 = _mm_madd_epi16(t3, k__cospi_p08_p24);
-
- // Rounding
- t0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
- t1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
- t2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
- t3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
- t4 = _mm_add_epi32(x4, k__DCT_CONST_ROUNDING);
- t5 = _mm_add_epi32(x5, k__DCT_CONST_ROUNDING);
- t6 = _mm_add_epi32(x6, k__DCT_CONST_ROUNDING);
- t7 = _mm_add_epi32(x7, k__DCT_CONST_ROUNDING);
- // Shift
- x0 = _mm_srai_epi32(t0, DCT_CONST_BITS);
- x1 = _mm_srai_epi32(t1, DCT_CONST_BITS);
- x2 = _mm_srai_epi32(t2, DCT_CONST_BITS);
- x3 = _mm_srai_epi32(t3, DCT_CONST_BITS);
- x4 = _mm_srai_epi32(t4, DCT_CONST_BITS);
- x5 = _mm_srai_epi32(t5, DCT_CONST_BITS);
- x6 = _mm_srai_epi32(t6, DCT_CONST_BITS);
- x7 = _mm_srai_epi32(t7, DCT_CONST_BITS);
-
- // Pack 32b integer to 16b with signed saturation
- in[7] = _mm_packs_epi32(x0, x1);
- in[5] = _mm_packs_epi32(x4, x5);
- in[3] = _mm_packs_epi32(x2, x3);
- in[1] = _mm_packs_epi32(x6, x7);
-
- // Interleave
- s0 = _mm_unpacklo_epi16(s6, s5);
- s1 = _mm_unpackhi_epi16(s6, s5);
-
- // Perform butterfly multiplication/addition
- x0 = _mm_madd_epi16(s0, k__cospi_m16_m16);
- x1 = _mm_madd_epi16(s1, k__cospi_m16_m16);
- x2 = _mm_madd_epi16(s0, k__cospi_m16_p16);
- x3 = _mm_madd_epi16(s1, k__cospi_m16_p16);
-
- // Rounding
- t0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
- t1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
- t2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
- t3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
-
- // Shift
- x0 = _mm_srai_epi32(t0, DCT_CONST_BITS);
- x1 = _mm_srai_epi32(t1, DCT_CONST_BITS);
- x2 = _mm_srai_epi32(t2, DCT_CONST_BITS);
- x3 = _mm_srai_epi32(t3, DCT_CONST_BITS);
-
- // Pack 32b integer to 16b with signed saturation
- t2 = _mm_packs_epi32(x0, x1);
- t3 = _mm_packs_epi32(x2, x3);
-
- x0 = _mm_sub_epi16(t2, s4);
- x1 = _mm_add_epi16(t2, s4); // -x1
- x2 = _mm_sub_epi16(s7, t3);
- x3 = _mm_add_epi16(s7, t3);
-
- s0 = _mm_unpacklo_epi16(x0, x3);
- s1 = _mm_unpackhi_epi16(x0, x3);
- s2 = _mm_unpacklo_epi16(x1, x2);
- s3 = _mm_unpackhi_epi16(x1, x2);
-
- t0 = _mm_madd_epi16(s0, k__cospi_p28_p04);
- t1 = _mm_madd_epi16(s1, k__cospi_p28_p04);
- t2 = _mm_madd_epi16(s2, k__cospi_m12_p20);
- t3 = _mm_madd_epi16(s3, k__cospi_m12_p20);
- t4 = _mm_madd_epi16(s2, k__cospi_p20_p12);
- t5 = _mm_madd_epi16(s3, k__cospi_p20_p12);
- t6 = _mm_madd_epi16(s0, k__cospi_m04_p28);
- t7 = _mm_madd_epi16(s1, k__cospi_m04_p28);
-
- // Rounding
- x0 = _mm_add_epi32(t0, k__DCT_CONST_ROUNDING);
- x1 = _mm_add_epi32(t1, k__DCT_CONST_ROUNDING);
- x2 = _mm_add_epi32(t2, k__DCT_CONST_ROUNDING);
- x3 = _mm_add_epi32(t3, k__DCT_CONST_ROUNDING);
- x4 = _mm_add_epi32(t4, k__DCT_CONST_ROUNDING);
- x5 = _mm_add_epi32(t5, k__DCT_CONST_ROUNDING);
- x6 = _mm_add_epi32(t6, k__DCT_CONST_ROUNDING);
- x7 = _mm_add_epi32(t7, k__DCT_CONST_ROUNDING);
- // Shift
- s0 = _mm_srai_epi32(x0, DCT_CONST_BITS);
- s1 = _mm_srai_epi32(x1, DCT_CONST_BITS);
- s2 = _mm_srai_epi32(x2, DCT_CONST_BITS);
- s3 = _mm_srai_epi32(x3, DCT_CONST_BITS);
- s4 = _mm_srai_epi32(x4, DCT_CONST_BITS);
- s5 = _mm_srai_epi32(x5, DCT_CONST_BITS);
- s6 = _mm_srai_epi32(x6, DCT_CONST_BITS);
- s7 = _mm_srai_epi32(x7, DCT_CONST_BITS);
-
- in[6] = _mm_packs_epi32(s0, s1);
- in[4] = _mm_packs_epi32(s4, s5);
- in[2] = _mm_packs_epi32(s2, s3);
- in[0] = _mm_packs_epi32(s6, s7);
-
- // coeffs: [x3 x2 x1 x0, x7 x6 x5 x4]
- // Transpose
- array_transpose_8x8(in, in);
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
__m128i in[8];
@@ -1502,55 +1275,6 @@
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
- case DST_DST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdst8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DCT_DST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdct8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DST_DCT:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdst8_sse2(in);
- fdct8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DST_ADST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fdst8_sse2(in);
- fadst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case ADST_DST:
- load_buffer_8x8(input, in, stride, 0, 0);
- fadst8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case DST_FLIPADST:
- load_buffer_8x8(input, in, stride, 0, 1);
- fdst8_sse2(in);
- fadst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
- case FLIPADST_DST:
- load_buffer_8x8(input, in, stride, 1, 0);
- fadst8_sse2(in);
- fdst8_sse2(in);
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
- break;
#endif // CONFIG_EXT_TX
default:
assert(0);
@@ -2420,351 +2144,6 @@
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-#if CONFIG_EXT_TX
-static void fdst16_8col(__m128i *in) {
- const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t) cospi_16_64);
- const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
- const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
-
- const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t) -cospi_16_64);
- const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
- const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64);
- const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
- const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
- const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
-
- const __m128i k__cospi_m08_m24 = pair_set_epi16(-cospi_8_64, -cospi_24_64);
- const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
-
- const __m128i k__cospi_m30_p02 = pair_set_epi16(-cospi_30_64, cospi_2_64);
- const __m128i k__cospi_m14_p18 = pair_set_epi16(-cospi_14_64, cospi_18_64);
- const __m128i k__cospi_m22_p10 = pair_set_epi16(-cospi_22_64, cospi_10_64);
- const __m128i k__cospi_m06_p26 = pair_set_epi16(-cospi_6_64, cospi_26_64);
- const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
- const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
- const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
- const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
-
- const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
-
- __m128i u0, u1, u2, u3, u4, u5, u6, u7;
- __m128i v0, v1, v2, v3, v4, v5, v6, v7;
- __m128i s0, s1, s2, s3, s4, s5, s6, s7;
- __m128i x0, x1, x2, x3, t0, t1, t2, t3;
- __m128i y0, y1, y2, y3, y4, y5, y6, y7;
- __m128i w0, w1, w2, w3, w4, w5, w6, w7;
-
- // (1)
- u0 = _mm_sub_epi16(in[0], in[15]);
- v7 = _mm_add_epi16(in[0], in[15]);
-
- u1 = _mm_sub_epi16(in[1], in[14]); // -u1
- v6 = _mm_add_epi16(in[1], in[14]); // -v6
-
- u2 = _mm_sub_epi16(in[2], in[13]);
- v5 = _mm_add_epi16(in[2], in[13]);
-
- u3 = _mm_sub_epi16(in[3], in[12]); // -u3
- v4 = _mm_add_epi16(in[3], in[12]); // -v4
-
- u4 = _mm_sub_epi16(in[4], in[11]);
- v3 = _mm_add_epi16(in[4], in[11]);
-
- u5 = _mm_sub_epi16(in[5], in[10]); // -u5
- v2 = _mm_add_epi16(in[5], in[10]); // -v2
-
- u6 = _mm_sub_epi16(in[6], in[9]);
- v1 = _mm_add_epi16(in[6], in[9]);
-
- u7 = _mm_sub_epi16(in[7], in[8]); // -u7
- v0 = _mm_add_epi16(in[7], in[8]); // -v0
-
- s0 = _mm_sub_epi16(u0, u7);
- s1 = _mm_sub_epi16(u1, u6); // -s1
- s2 = _mm_sub_epi16(u2, u5);
- s3 = _mm_sub_epi16(u3, u4); // -s3
- s4 = _mm_add_epi16(u3, u4); // -s4
- s5 = _mm_add_epi16(u2, u5);
- s6 = _mm_add_epi16(u1, u6); // -s6
- s7 = _mm_add_epi16(u0, u7);
-
- x0 = _mm_sub_epi16(s0, s3);
- x1 = _mm_sub_epi16(s1, s2); // -x1
- x2 = _mm_add_epi16(s1, s2); // -x2
- x3 = _mm_add_epi16(s0, s3);
-
- y0 = _mm_unpacklo_epi16(x0, x1);
- y1 = _mm_unpackhi_epi16(x0, x1);
- y2 = _mm_unpacklo_epi16(x2, x3);
- y3 = _mm_unpackhi_epi16(x2, x3);
-
- t0 = _mm_madd_epi16(y0, k__cospi_p16_m16);
- t1 = _mm_madd_epi16(y1, k__cospi_p16_m16);
- t2 = _mm_madd_epi16(y0, k__cospi_p16_p16);
- t3 = _mm_madd_epi16(y1, k__cospi_p16_p16);
- x0 = _mm_madd_epi16(y2, k__cospi_m24_p08);
- x1 = _mm_madd_epi16(y3, k__cospi_m24_p08);
- x2 = _mm_madd_epi16(y2, k__cospi_p08_p24);
- x3 = _mm_madd_epi16(y3, k__cospi_p08_p24);
-
- y0 = _mm_add_epi32(t0, k__DCT_CONST_ROUNDING);
- y1 = _mm_add_epi32(t1, k__DCT_CONST_ROUNDING);
- y2 = _mm_add_epi32(t2, k__DCT_CONST_ROUNDING);
- y3 = _mm_add_epi32(t3, k__DCT_CONST_ROUNDING);
- y4 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
- y5 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
- y6 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
- y7 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
-
- t0 = _mm_srai_epi32(y0, DCT_CONST_BITS);
- t1 = _mm_srai_epi32(y1, DCT_CONST_BITS);
- t2 = _mm_srai_epi32(y2, DCT_CONST_BITS);
- t3 = _mm_srai_epi32(y3, DCT_CONST_BITS);
- x0 = _mm_srai_epi32(y4, DCT_CONST_BITS);
- x1 = _mm_srai_epi32(y5, DCT_CONST_BITS);
- x2 = _mm_srai_epi32(y6, DCT_CONST_BITS);
- x3 = _mm_srai_epi32(y7, DCT_CONST_BITS);
-
- in[15] = _mm_packs_epi32(t0, t1);
- in[11] = _mm_packs_epi32(x0, x1);
- in[7] = _mm_packs_epi32(t2, t3);
- in[3] = _mm_packs_epi32(x2, x3);
-
- // (2)
- t0 = _mm_unpacklo_epi16(s6, s5);
- t1 = _mm_unpackhi_epi16(s6, s5);
-
- y0 = _mm_madd_epi16(t0, k__cospi_m16_m16);
- y1 = _mm_madd_epi16(t1, k__cospi_m16_m16);
- y2 = _mm_madd_epi16(t0, k__cospi_m16_p16);
- y3 = _mm_madd_epi16(t1, k__cospi_m16_p16);
-
- x0 = _mm_add_epi32(y0, k__DCT_CONST_ROUNDING);
- x1 = _mm_add_epi32(y1, k__DCT_CONST_ROUNDING);
- x2 = _mm_add_epi32(y2, k__DCT_CONST_ROUNDING);
- x3 = _mm_add_epi32(y3, k__DCT_CONST_ROUNDING);
-
- y4 = _mm_srai_epi32(x0, DCT_CONST_BITS);
- y5 = _mm_srai_epi32(x1, DCT_CONST_BITS);
- y6 = _mm_srai_epi32(x2, DCT_CONST_BITS);
- y7 = _mm_srai_epi32(x3, DCT_CONST_BITS);
-
- t2 = _mm_packs_epi32(y4, y5);
- t3 = _mm_packs_epi32(y6, y7);
-
- x0 = _mm_sub_epi16(s4, t2); // -x0
- x1 = _mm_add_epi16(s4, t2); // -x1
- x2 = _mm_sub_epi16(s7, t3);
- x3 = _mm_add_epi16(s7, t3);
-
- y0 = _mm_unpacklo_epi16(x0, x3);
- y1 = _mm_unpackhi_epi16(x0, x3);
- y2 = _mm_unpacklo_epi16(x1, x2);
- y3 = _mm_unpackhi_epi16(x1, x2);
-
- w0 = _mm_madd_epi16(y0, k__cospi_m28_p04);
- w1 = _mm_madd_epi16(y1, k__cospi_m28_p04);
- w2 = _mm_madd_epi16(y2, k__cospi_m12_p20);
- w3 = _mm_madd_epi16(y3, k__cospi_m12_p20);
- w4 = _mm_madd_epi16(y2, k__cospi_p20_p12);
- w5 = _mm_madd_epi16(y3, k__cospi_p20_p12);
- w6 = _mm_madd_epi16(y0, k__cospi_p04_p28);
- w7 = _mm_madd_epi16(y1, k__cospi_p04_p28);
-
- u0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
- u1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
- u2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
- u3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
- u4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
- u5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
- u6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
- u7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
-
- y0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
- y1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
- y2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
- y3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
- y4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
- y5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
- y6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
- y7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
-
- in[13] = _mm_packs_epi32(y0, y1);
- in[9] = _mm_packs_epi32(y4, y5);
- in[5] = _mm_packs_epi32(y2, y3);
- in[1] = _mm_packs_epi32(y6, y7);
-
- // (3)
- y0 = _mm_unpacklo_epi16(v5, v2);
- y1 = _mm_unpackhi_epi16(v5, v2);
- y2 = _mm_unpacklo_epi16(v4, v3);
- y3 = _mm_unpackhi_epi16(v4, v3);
-
- u0 = _mm_madd_epi16(y0, k__cospi_p16_p16);
- u1 = _mm_madd_epi16(y1, k__cospi_p16_p16);
- u2 = _mm_madd_epi16(y2, k__cospi_m16_m16);
- u3 = _mm_madd_epi16(y3, k__cospi_m16_m16);
- u4 = _mm_madd_epi16(y2, k__cospi_m16_p16);
- u5 = _mm_madd_epi16(y3, k__cospi_m16_p16);
- u6 = _mm_madd_epi16(y0, k__cospi_p16_m16);
- u7 = _mm_madd_epi16(y1, k__cospi_p16_m16);
-
- w0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
- w1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
- w2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
- w3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
- w4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- w5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- w6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- w7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-
- s0 = _mm_srai_epi32(w0, DCT_CONST_BITS);
- s1 = _mm_srai_epi32(w1, DCT_CONST_BITS);
- s2 = _mm_srai_epi32(w2, DCT_CONST_BITS);
- s3 = _mm_srai_epi32(w3, DCT_CONST_BITS);
- s4 = _mm_srai_epi32(w4, DCT_CONST_BITS);
- s5 = _mm_srai_epi32(w5, DCT_CONST_BITS);
- s6 = _mm_srai_epi32(w6, DCT_CONST_BITS);
- s7 = _mm_srai_epi32(w7, DCT_CONST_BITS);
-
- y2 = _mm_packs_epi32(s0, s1);
- y3 = _mm_packs_epi32(s2, s3);
- y4 = _mm_packs_epi32(s4, s5);
- y5 = _mm_packs_epi32(s6, s7);
-
- // step 3
- w0 = _mm_sub_epi16(v0, y3); // -w0
- w1 = _mm_add_epi16(v1, y2);
- w2 = _mm_sub_epi16(v1, y2);
- w3 = _mm_add_epi16(v0, y3); // -w3
- w4 = _mm_sub_epi16(v7, y4);
- w5 = _mm_add_epi16(v6, y5); // -w5
- w6 = _mm_sub_epi16(v6, y5); // -w6
- w7 = _mm_add_epi16(v7, y4);
-
- // step 4
- x0 = _mm_unpacklo_epi16(w1, w6);
- x1 = _mm_unpackhi_epi16(w1, w6);
- x2 = _mm_unpacklo_epi16(w2, w5);
- x3 = _mm_unpackhi_epi16(w2, w5);
-
- u0 = _mm_madd_epi16(x0, k__cospi_m08_m24);
- u1 = _mm_madd_epi16(x1, k__cospi_m08_m24);
- u2 = _mm_madd_epi16(x2, k__cospi_p24_m08);
- u3 = _mm_madd_epi16(x3, k__cospi_p24_m08);
- u4 = _mm_madd_epi16(x2, k__cospi_p08_p24);
- u5 = _mm_madd_epi16(x3, k__cospi_p08_p24);
- u6 = _mm_madd_epi16(x0, k__cospi_p24_m08);
- u7 = _mm_madd_epi16(x1, k__cospi_p24_m08);
-
- s0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
- s1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
- s2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
- s3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
- s4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- s5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- s6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- s7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
-
- u0 = _mm_srai_epi32(s0, DCT_CONST_BITS);
- u1 = _mm_srai_epi32(s1, DCT_CONST_BITS);
- u2 = _mm_srai_epi32(s2, DCT_CONST_BITS);
- u3 = _mm_srai_epi32(s3, DCT_CONST_BITS);
- u4 = _mm_srai_epi32(s4, DCT_CONST_BITS);
- u5 = _mm_srai_epi32(s5, DCT_CONST_BITS);
- u6 = _mm_srai_epi32(s6, DCT_CONST_BITS);
- u7 = _mm_srai_epi32(s7, DCT_CONST_BITS);
-
- y1 = _mm_packs_epi32(u0, u1);
- y2 = _mm_packs_epi32(u2, u3);
- y5 = _mm_packs_epi32(u4, u5);
- y6 = _mm_packs_epi32(u6, u7);
-
- // step 5
- v0 = _mm_sub_epi16(w0, y1); // -v0
- v1 = _mm_add_epi16(w0, y1); // -v1
- v2 = _mm_sub_epi16(w3, y2); // -v2
- v3 = _mm_add_epi16(w3, y2); // -v3
- v4 = _mm_sub_epi16(w4, y5);
- v5 = _mm_add_epi16(w4, y5);
- v6 = _mm_sub_epi16(w7, y6);
- v7 = _mm_add_epi16(w7, y6);
-
- u0 = _mm_unpacklo_epi16(v0, v7);
- u1 = _mm_unpackhi_epi16(v0, v7);
- u2 = _mm_unpacklo_epi16(v1, v6);
- u3 = _mm_unpackhi_epi16(v1, v6);
- u4 = _mm_unpacklo_epi16(v2, v5);
- u5 = _mm_unpackhi_epi16(v2, v5);
- u6 = _mm_unpacklo_epi16(v3, v4);
- u7 = _mm_unpackhi_epi16(v3, v4);
-
- s0 = _mm_madd_epi16(u0, k__cospi_m30_p02); // x0
- s1 = _mm_madd_epi16(u1, k__cospi_m30_p02);
- s2 = _mm_madd_epi16(u2, k__cospi_m14_p18); // x1
- s3 = _mm_madd_epi16(u3, k__cospi_m14_p18);
- s4 = _mm_madd_epi16(u4, k__cospi_m22_p10); // x2
- s5 = _mm_madd_epi16(u5, k__cospi_m22_p10);
- s6 = _mm_madd_epi16(u6, k__cospi_m06_p26); // x3
- s7 = _mm_madd_epi16(u7, k__cospi_m06_p26);
-
- w0 = _mm_madd_epi16(u6, k__cospi_p26_p06); // x4
- w1 = _mm_madd_epi16(u7, k__cospi_p26_p06);
- w2 = _mm_madd_epi16(u4, k__cospi_p10_p22); // x5
- w3 = _mm_madd_epi16(u5, k__cospi_p10_p22);
- w4 = _mm_madd_epi16(u2, k__cospi_p18_p14); // x6
- w5 = _mm_madd_epi16(u3, k__cospi_p18_p14);
- w6 = _mm_madd_epi16(u0, k__cospi_p02_p30); // x7
- w7 = _mm_madd_epi16(u1, k__cospi_p02_p30);
-
- v0 = _mm_add_epi32(s0, k__DCT_CONST_ROUNDING);
- v1 = _mm_add_epi32(s1, k__DCT_CONST_ROUNDING);
- v2 = _mm_add_epi32(s2, k__DCT_CONST_ROUNDING);
- v3 = _mm_add_epi32(s3, k__DCT_CONST_ROUNDING);
- v4 = _mm_add_epi32(s4, k__DCT_CONST_ROUNDING);
- v5 = _mm_add_epi32(s5, k__DCT_CONST_ROUNDING);
- v6 = _mm_add_epi32(s6, k__DCT_CONST_ROUNDING);
- v7 = _mm_add_epi32(s7, k__DCT_CONST_ROUNDING);
-
- y0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
- y1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
- y2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
- y3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
- y4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
- y5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
- y6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
- y7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
-
- u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
- u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
- u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
- u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
- u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
- u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
- u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
- u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
-
- s0 = _mm_srai_epi32(y0, DCT_CONST_BITS);
- s1 = _mm_srai_epi32(y1, DCT_CONST_BITS);
- s2 = _mm_srai_epi32(y2, DCT_CONST_BITS);
- s3 = _mm_srai_epi32(y3, DCT_CONST_BITS);
- s4 = _mm_srai_epi32(y4, DCT_CONST_BITS);
- s5 = _mm_srai_epi32(y5, DCT_CONST_BITS);
- s6 = _mm_srai_epi32(y6, DCT_CONST_BITS);
- s7 = _mm_srai_epi32(y7, DCT_CONST_BITS);
-
- in[14] = _mm_packs_epi32(u0, u1);
- in[6] = _mm_packs_epi32(u2, u3);
- in[10] = _mm_packs_epi32(u4, u5);
- in[2] = _mm_packs_epi32(u6, u7);
- in[12] = _mm_packs_epi32(s0, s1);
- in[4] = _mm_packs_epi32(s2, s3);
- in[8] = _mm_packs_epi32(s4, s5);
- in[0] = _mm_packs_epi32(s6, s7);
-}
-#endif // CONFIG_EXT_TX
-
static void fdct16_sse2(__m128i *in0, __m128i *in1) {
fdct16_8col(in0);
fdct16_8col(in1);
@@ -2777,14 +2156,6 @@
array_transpose_16x16(in0, in1);
}
-#if CONFIG_EXT_TX
-static void fdst16_sse2(__m128i *in0, __m128i *in1) {
- fdst16_8col(in0);
- fdst16_8col(in1);
- array_transpose_16x16(in0, in1);
-}
-#endif // CONFIG_EXT_TX
-
void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
__m128i in0[16], in1[16];
@@ -2850,55 +2221,6 @@
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
- case DST_DST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DCT_DST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdct16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DST_DCT:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdct16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DST_ADST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fadst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case ADST_DST:
- load_buffer_16x16(input, in0, in1, stride, 0, 0);
- fadst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case DST_FLIPADST:
- load_buffer_16x16(input, in0, in1, stride, 0, 1);
- fdst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fadst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
- case FLIPADST_DST:
- load_buffer_16x16(input, in0, in1, stride, 1, 0);
- fadst16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdst16_sse2(in0, in1);
- write_buffer_16x16(output, in0, in1, 16);
- break;
#endif // CONFIG_EXT_TX
default:
assert(0);