Merge "Add "entropy" experiment flag" into nextgenv2
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index f621ec6..863f0db 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -260,6 +260,30 @@
}
#if CONFIG_EXT_TX
+static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+}
+
+static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
// For use in lieu of DST
static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -291,6 +315,37 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)highbd_dct_const_round_shift(input[i] * Sqrt2, bd);
+}
+
+static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void) bd;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * 2 * Sqrt2, bd);
+}
+
+static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void) bd;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
int bd) {
int i;
@@ -331,85 +386,19 @@
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
-
- tran_low_t temp_in[32], temp_out[32];
- transform_2d ht = {idct4_c, idct4_c};
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = idct4_c;
- ht.rows = idct4_c;
- out_scale = cospi_16_64 >> 3;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = idct8_c;
- ht.rows = idct8_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = idct16_c;
- ht.rows = idct16_c;
- out_scale = cospi_16_64 >> 4;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = idct32_c;
- ht.rows = idct32_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (c = 0; c < bs; ++c) {
- for (r = 0; r < bs; ++r)
- temp_in[r] = input[r * coeff_stride + c];
- ht.cols(temp_in, temp_out);
-
- for (r = 0; r < bs; ++r) {
- tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp);
- }
- }
- return;
- }
-
- if (tx_type == H_DCT) {
+ if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
- temp_in[c] = input[r * coeff_stride + c];
- ht.rows(temp_in, temp_out);
-
- for (c = 0; c < bs; ++c) {
- tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp);
- }
+ dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+ dest += stride;
+ input += bs;
}
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
- dest += stride;
- input += bs;
}
}
#define FLIPUD_PTR(dest, stride, size) do { \
- (dest) = (dest) + ((size) - 1) * (stride); \
- (stride) = - (stride); \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
} while (0)
static void maybe_flip_strides(uint8_t **dst, int *dstride,
@@ -428,6 +417,7 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
case V_DCT:
case H_DCT:
break;
@@ -705,78 +695,13 @@
const int shift = bs < 32 ? 3 : 2;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- tran_low_t temp_in[32], temp_out[32];
- highbd_transform_2d ht = {vpx_highbd_idct4_c, vpx_highbd_idct4_c};
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = vpx_highbd_idct4_c;
- ht.rows = vpx_highbd_idct4_c;
- out_scale = cospi_16_64 >> 3;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = vpx_highbd_idct8_c;
- ht.rows = vpx_highbd_idct8_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = vpx_highbd_idct16_c;
- ht.rows = vpx_highbd_idct16_c;
- out_scale = cospi_16_64 >> 4;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = vpx_highbd_idct32_c;
- ht.rows = vpx_highbd_idct32_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (c = 0; c < bs; ++c) {
- for (r = 0; r < bs; ++r)
- temp_in[r] = input[r * coeff_stride + c];
- ht.cols(temp_in, temp_out, bd);
-
- for (r = 0; r < bs; ++r) {
- tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp, bd);
- }
- }
- return;
- }
-
- if (tx_type == H_DCT) {
+ if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
- temp_in[c] = input[r * coeff_stride + c];
- ht.rows(temp_in, temp_out, bd);
-
- for (c = 0; c < bs; ++c) {
- tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp, bd);
- }
+ dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+ dest += stride;
+ input += bs;
}
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
- dest += stride;
- input += bs;
}
}
@@ -796,6 +721,9 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
+ case V_DCT:
+ case H_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
@@ -843,6 +771,9 @@
{ idst4_c, iadst4_c }, // DST_FLIPADST = 13,
{ iadst4_c, idst4_c }, // FLIPADST_DST = 14,
{ idst4_c, idst4_c }, // DST_DST = 15
+ { iidtx4_c, iidtx4_c }, // IDTX = 16
+ { idct4_c, iidtx4_c }, // V_DCT = 17
+ { iidtx4_c, idct4_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -906,6 +837,9 @@
{ idst8_c, iadst8_c }, // DST_FLIPADST = 13,
{ iadst8_c, idst8_c }, // FLIPADST_DST = 14,
{ idst8_c, idst8_c }, // DST_DST = 15
+ { iidtx8_c, iidtx8_c }, // IDTX = 16
+ { idct8_c, iidtx8_c }, // V_DCT = 17
+ { iidtx8_c, idct8_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -969,6 +903,9 @@
{ idst16_c, iadst16_c }, // DST_FLIPADST = 13,
{ iadst16_c, idst16_c }, // FLIPADST_DST = 14,
{ idst16_c, idst16_c }, // DST_DST = 15
+ { iidtx16_c, iidtx16_c }, // IDTX = 16
+ { idct16_c, iidtx16_c }, // V_DCT = 17
+ { iidtx16_c, idct16_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1032,6 +969,9 @@
{ ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
{ ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
{ ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
+ { iidtx32_c, iidtx32_c }, // IDTX = 16
+ { idct32_c, iidtx32_c }, // V_DCT = 17
+ { iidtx32_c, idct32_c }, // H_DCT = 18
};
int i, j;
@@ -1165,11 +1105,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 4, tx_type);
break;
@@ -1206,11 +1146,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 8, tx_type);
break;
@@ -1247,11 +1187,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 16, tx_type);
break;
@@ -1284,10 +1224,10 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
- vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
+ break;
case IDTX:
inv_idtx_add_c(input, dest, stride, 32, tx_type);
break;
@@ -1319,6 +1259,9 @@
{ highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
{ highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
+ { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16
+ { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17
+ { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1385,6 +1328,9 @@
{ highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
{ highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
+ { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16
+ { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17
+ { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1451,6 +1397,9 @@
{ highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
{ highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
+ { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16
+ { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17
+ { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1501,22 +1450,25 @@
void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_32[] = {
- { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
- { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
- { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
- { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
+ { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1657,11 +1609,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
break;
@@ -1699,11 +1651,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
break;
@@ -1741,11 +1693,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
break;
@@ -1779,10 +1731,10 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
- vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
+ break;
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
break;
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 31a4c87..8a1ee20 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -1212,6 +1212,30 @@
}
#if CONFIG_EXT_TX
+static void fidtx4(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
+}
+
+static void fidtx8(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void fidtx16(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void fidtx32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
// For use in lieu of DST
static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -1315,6 +1339,7 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
case H_DCT:
case V_DCT:
break;
@@ -1362,6 +1387,9 @@
{ fdst4, fadst4 }, // DST_FLIPADST = 13,
{ fadst4, fdst4 }, // FLIPADST_DST = 14,
{ fdst4, fdst4 }, // DST_DST = 15
+ { fidtx4, fidtx4 }, // IDTX = 16
+ { fdct4, fidtx4 }, // V_DCT = 17
+ { fidtx4, fdct4 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1383,6 +1411,9 @@
{ fdst8, fadst8 }, // DST_FLIPADST = 13,
{ fadst8, fdst8 }, // FLIPADST_DST = 14,
{ fdst8, fdst8 }, // DST_DST = 15
+ { fidtx8, fidtx8 }, // IDTX = 16
+ { fdct8, fidtx8 }, // V_DCT = 17
+ { fidtx8, fdct8 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1404,6 +1435,9 @@
{ fdst16, fadst16 }, // DST_FLIPADST = 13,
{ fadst16, fdst16 }, // FLIPADST_DST = 14,
{ fdst16, fdst16 }, // DST_DST = 15
+ { fidtx16, fidtx16 }, // IDTX = 16
+ { fdct16, fidtx16 }, // V_DCT = 17
+ { fidtx16, fdct16 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1425,6 +1459,9 @@
{ fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
{ fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
{ fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
+ { fidtx32, fidtx32 }, // IDTX = 16
+ { fdct32, fidtx32 }, // V_DCT = 17
+ { fidtx32, fdct32 }, // H_DCT = 18
};
#endif // CONFIG_EXT_TX
@@ -1766,86 +1803,12 @@
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
-
- const int16_t *input = src_diff;
- tran_low_t *output = coeff;
-
- int i, j;
- tran_low_t temp_in[32], temp_out[32];
- transform_2d ht = {fdct4, fdct4};
- int in_scale = 1;
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = fdct4;
- ht.rows = fdct4;
- in_scale = 16;
- out_scale = cospi_16_64 >> 1;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = fdct8;
- ht.rows = fdct8;
- in_scale = 4;
- out_scale = (1 << DCT_CONST_BITS);
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = fdct16;
- ht.rows = fdct16;
- in_scale = 4;
- out_scale = cospi_16_64;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = fdct32;
- ht.rows = fdct32;
- in_scale = 4;
- out_scale = (1 << (DCT_CONST_BITS - 2));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (i = 0; i < bs; ++i) {
- for (j = 0; j < bs; ++j)
- temp_in[j] = input[j * stride + i] * in_scale;
- ht.cols(temp_in, temp_out);
-
- for (j = 0; j < bs; ++j) {
- tran_high_t temp = (tran_high_t)temp_out[j] * out_scale;
- temp >>= DCT_CONST_BITS;
- output[j * coeff_stride + i] = (tran_low_t)temp;
- }
+ if (tx_type == IDTX) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
}
- return;
- }
-
- // Rows
- if (tx_type == H_DCT) {
- for (j = 0; j < bs; ++j) {
- for (i = 0; i < bs; ++i)
- temp_in[i] = input[j * stride + i] * in_scale;
- ht.rows(temp_in, temp_out);
-
- for (i = 0; i < bs; ++i) {
- tran_high_t temp = (tran_high_t)temp_out[i] * out_scale;
- temp >>= DCT_CONST_BITS;
- output[j * coeff_stride + i] = (tran_low_t)temp;
- }
- }
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
- src_diff += stride;
- coeff += bs;
}
}
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index c3a739b..faedb43 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -65,6 +65,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
@@ -105,6 +107,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
@@ -145,6 +149,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
@@ -185,6 +191,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
@@ -226,11 +234,10 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
- // Use C version since DST exists only in C
- vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
@@ -270,11 +277,11 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST exists only in C
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
@@ -314,11 +321,11 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST exists only in C
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
@@ -355,10 +362,10 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
- vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;