Merge "remove filter_cache" into nextgenv2
diff --git a/configure b/configure
index 97366e4..ed1d048 100755
--- a/configure
+++ b/configure
@@ -284,6 +284,7 @@
ext_partition
ext_tile
obmc
+ entropy
"
CONFIG_LIST="
dependency_tracking
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c
index e14aee7..b3c216e 100644
--- a/vp10/common/alloccommon.c
+++ b/vp10/common/alloccommon.c
@@ -97,10 +97,13 @@
}
void vp10_free_context_buffers(VP10_COMMON *cm) {
+ int i;
cm->free_mi(cm);
free_seg_map(cm);
- vpx_free(cm->above_context);
- cm->above_context = NULL;
+ for (i = 0 ; i < MAX_MB_PLANE ; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = NULL;
+ }
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
#if CONFIG_VAR_TX
@@ -128,11 +131,14 @@
}
if (cm->above_context_alloc_cols < cm->mi_cols) {
- vpx_free(cm->above_context);
- cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc(
- 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE,
- sizeof(*cm->above_context));
- if (!cm->above_context) goto fail;
+ int i;
+ for (i = 0 ; i < MAX_MB_PLANE ; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
+ 2 * mi_cols_aligned_to_sb(cm->mi_cols),
+ sizeof(*cm->above_context[0]));
+ if (!cm->above_context[i]) goto fail;
+ }
vpx_free(cm->above_seg_context);
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index f621ec6..863f0db 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -260,6 +260,30 @@
}
#if CONFIG_EXT_TX
+static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+}
+
+static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
// For use in lieu of DST
static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -291,6 +315,37 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)highbd_dct_const_round_shift(input[i] * Sqrt2, bd);
+}
+
+static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void) bd;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * 2 * Sqrt2, bd);
+}
+
+static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void) bd;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
int bd) {
int i;
@@ -331,85 +386,19 @@
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
-
- tran_low_t temp_in[32], temp_out[32];
- transform_2d ht = {idct4_c, idct4_c};
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = idct4_c;
- ht.rows = idct4_c;
- out_scale = cospi_16_64 >> 3;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = idct8_c;
- ht.rows = idct8_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = idct16_c;
- ht.rows = idct16_c;
- out_scale = cospi_16_64 >> 4;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = idct32_c;
- ht.rows = idct32_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (c = 0; c < bs; ++c) {
- for (r = 0; r < bs; ++r)
- temp_in[r] = input[r * coeff_stride + c];
- ht.cols(temp_in, temp_out);
-
- for (r = 0; r < bs; ++r) {
- tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp);
- }
- }
- return;
- }
-
- if (tx_type == H_DCT) {
+ if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
- temp_in[c] = input[r * coeff_stride + c];
- ht.rows(temp_in, temp_out);
-
- for (c = 0; c < bs; ++c) {
- tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp);
- }
+ dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+ dest += stride;
+ input += bs;
}
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
- dest += stride;
- input += bs;
}
}
#define FLIPUD_PTR(dest, stride, size) do { \
- (dest) = (dest) + ((size) - 1) * (stride); \
- (stride) = - (stride); \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
} while (0)
static void maybe_flip_strides(uint8_t **dst, int *dstride,
@@ -428,6 +417,7 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
case V_DCT:
case H_DCT:
break;
@@ -705,78 +695,13 @@
const int shift = bs < 32 ? 3 : 2;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- tran_low_t temp_in[32], temp_out[32];
- highbd_transform_2d ht = {vpx_highbd_idct4_c, vpx_highbd_idct4_c};
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = vpx_highbd_idct4_c;
- ht.rows = vpx_highbd_idct4_c;
- out_scale = cospi_16_64 >> 3;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = vpx_highbd_idct8_c;
- ht.rows = vpx_highbd_idct8_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = vpx_highbd_idct16_c;
- ht.rows = vpx_highbd_idct16_c;
- out_scale = cospi_16_64 >> 4;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = vpx_highbd_idct32_c;
- ht.rows = vpx_highbd_idct32_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (c = 0; c < bs; ++c) {
- for (r = 0; r < bs; ++r)
- temp_in[r] = input[r * coeff_stride + c];
- ht.cols(temp_in, temp_out, bd);
-
- for (r = 0; r < bs; ++r) {
- tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp, bd);
- }
- }
- return;
- }
-
- if (tx_type == H_DCT) {
+ if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
- temp_in[c] = input[r * coeff_stride + c];
- ht.rows(temp_in, temp_out, bd);
-
- for (c = 0; c < bs; ++c) {
- tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp, bd);
- }
+ dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+ dest += stride;
+ input += bs;
}
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
- dest += stride;
- input += bs;
}
}
@@ -796,6 +721,9 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
+ case V_DCT:
+ case H_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
@@ -843,6 +771,9 @@
{ idst4_c, iadst4_c }, // DST_FLIPADST = 13,
{ iadst4_c, idst4_c }, // FLIPADST_DST = 14,
{ idst4_c, idst4_c }, // DST_DST = 15
+ { iidtx4_c, iidtx4_c }, // IDTX = 16
+ { idct4_c, iidtx4_c }, // V_DCT = 17
+ { iidtx4_c, idct4_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -906,6 +837,9 @@
{ idst8_c, iadst8_c }, // DST_FLIPADST = 13,
{ iadst8_c, idst8_c }, // FLIPADST_DST = 14,
{ idst8_c, idst8_c }, // DST_DST = 15
+ { iidtx8_c, iidtx8_c }, // IDTX = 16
+ { idct8_c, iidtx8_c }, // V_DCT = 17
+ { iidtx8_c, idct8_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -969,6 +903,9 @@
{ idst16_c, iadst16_c }, // DST_FLIPADST = 13,
{ iadst16_c, idst16_c }, // FLIPADST_DST = 14,
{ idst16_c, idst16_c }, // DST_DST = 15
+ { iidtx16_c, iidtx16_c }, // IDTX = 16
+ { idct16_c, iidtx16_c }, // V_DCT = 17
+ { iidtx16_c, idct16_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1032,6 +969,9 @@
{ ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
{ ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
{ ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
+ { iidtx32_c, iidtx32_c }, // IDTX = 16
+ { idct32_c, iidtx32_c }, // V_DCT = 17
+ { iidtx32_c, idct32_c }, // H_DCT = 18
};
int i, j;
@@ -1165,11 +1105,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 4, tx_type);
break;
@@ -1206,11 +1146,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 8, tx_type);
break;
@@ -1247,11 +1187,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 16, tx_type);
break;
@@ -1284,10 +1224,10 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
- vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
+ break;
case IDTX:
inv_idtx_add_c(input, dest, stride, 32, tx_type);
break;
@@ -1319,6 +1259,9 @@
{ highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
{ highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
+ { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16
+ { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17
+ { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1385,6 +1328,9 @@
{ highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
{ highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
+ { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16
+ { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17
+ { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1451,6 +1397,9 @@
{ highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
{ highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
+ { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16
+ { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17
+ { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1501,22 +1450,25 @@
void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_32[] = {
- { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
- { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
- { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
- { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
+ { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1657,11 +1609,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
break;
@@ -1699,11 +1651,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
break;
@@ -1741,11 +1693,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
break;
@@ -1779,10 +1731,10 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
- vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
+ break;
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
break;
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index bcc69f3..26ae569 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -313,7 +313,7 @@
BufferPool *buffer_pool;
PARTITION_CONTEXT *above_seg_context;
- ENTROPY_CONTEXT *above_context;
+ ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
#if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT left_txfm_context[8];
@@ -405,9 +405,7 @@
for (i = 0; i < MAX_MB_PLANE; ++i) {
xd->plane[i].dqcoeff = dqcoeff;
- xd->above_context[i] = cm->above_context +
- i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
-
+ xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
} else {
@@ -525,6 +523,27 @@
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
+ int mi_col_start, int mi_col_end) {
+ const int width = mi_col_end - mi_col_start;
+ int i;
+
+ for (i = 0 ; i < MAX_MB_PLANE ; i++)
+ vp10_zero_array(cm->above_context[i] + 2 * mi_col_start, 2 * width);
+ vp10_zero_array(cm->above_seg_context + mi_col_start, width);
+#if CONFIG_VAR_TX
+ vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
+#endif // CONFIG_VAR_TX
+}
+
+static INLINE void vp10_zero_left_context(MACROBLOCKD *const xd) {
+ vp10_zero(xd->left_context);
+ vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(xd->left_txfm_context_buffer);
+#endif
+}
+
#if CONFIG_VAR_TX
static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx,
TX_SIZE tx_size,
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index ce6317c..0441662 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -2962,18 +2962,7 @@
assert(tile_rows <= 4);
assert(tile_cols <= (1 << 6));
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(cm->above_context, 0,
- sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
-
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * aligned_cols);
-
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * aligned_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_cols);
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -3032,11 +3021,7 @@
tile_cols - tile_col - 1 : tile_col;
tile_data = pbi->tile_data + tile_cols * tile_row + col;
vp10_tile_set_col(&tile, tile_data->cm, col);
- vp10_zero(tile_data->xd.left_context);
- vp10_zero(tile_data->xd.left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(tile_data->xd.left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(pbi, &tile_data->xd,
@@ -3126,11 +3111,7 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp10_zero(tile_data->xd.left_context);
- vp10_zero(tile_data->xd.left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(tile_data->xd.left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->pbi, &tile_data->xd,
@@ -3211,16 +3192,8 @@
worker->data2 = &pbi->tile_worker_info[n];
}
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(cm->above_context, 0,
- sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * aligned_mi_cols);
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * aligned_mi_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_mi_cols);
+
// Load tile data into tile_buffers
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 1ef2ea5..f20c224 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1710,10 +1710,7 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp10_zero(xd->left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(xd->left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
write_modes_sb(cpi, tile, w, tok, tok_end,
@@ -2190,12 +2187,7 @@
const int tile_rows = 1 << cm->log2_tile_rows;
unsigned int max_tile = 0;
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * mi_cols_aligned_to_sb(cm->mi_cols));
-#endif
+ vp10_zero_above_context(cm, 0, mi_cols_aligned_to_sb(cm->mi_cols));
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 31a4c87..8a1ee20 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -1212,6 +1212,30 @@
}
#if CONFIG_EXT_TX
+static void fidtx4(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
+}
+
+static void fidtx8(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void fidtx16(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void fidtx32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
// For use in lieu of DST
static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -1315,6 +1339,7 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
case H_DCT:
case V_DCT:
break;
@@ -1362,6 +1387,9 @@
{ fdst4, fadst4 }, // DST_FLIPADST = 13,
{ fadst4, fdst4 }, // FLIPADST_DST = 14,
{ fdst4, fdst4 }, // DST_DST = 15
+ { fidtx4, fidtx4 }, // IDTX = 16
+ { fdct4, fidtx4 }, // V_DCT = 17
+ { fidtx4, fdct4 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1383,6 +1411,9 @@
{ fdst8, fadst8 }, // DST_FLIPADST = 13,
{ fadst8, fdst8 }, // FLIPADST_DST = 14,
{ fdst8, fdst8 }, // DST_DST = 15
+ { fidtx8, fidtx8 }, // IDTX = 16
+ { fdct8, fidtx8 }, // V_DCT = 17
+ { fidtx8, fdct8 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1404,6 +1435,9 @@
{ fdst16, fadst16 }, // DST_FLIPADST = 13,
{ fadst16, fdst16 }, // FLIPADST_DST = 14,
{ fdst16, fdst16 }, // DST_DST = 15
+ { fidtx16, fidtx16 }, // IDTX = 16
+ { fdct16, fidtx16 }, // V_DCT = 17
+ { fidtx16, fdct16 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1425,6 +1459,9 @@
{ fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
{ fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
{ fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
+ { fidtx32, fidtx32 }, // IDTX = 16
+ { fdct32, fidtx32 }, // V_DCT = 17
+ { fidtx32, fdct32 }, // H_DCT = 18
};
#endif // CONFIG_EXT_TX
@@ -1766,86 +1803,12 @@
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
-
- const int16_t *input = src_diff;
- tran_low_t *output = coeff;
-
- int i, j;
- tran_low_t temp_in[32], temp_out[32];
- transform_2d ht = {fdct4, fdct4};
- int in_scale = 1;
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = fdct4;
- ht.rows = fdct4;
- in_scale = 16;
- out_scale = cospi_16_64 >> 1;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = fdct8;
- ht.rows = fdct8;
- in_scale = 4;
- out_scale = (1 << DCT_CONST_BITS);
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = fdct16;
- ht.rows = fdct16;
- in_scale = 4;
- out_scale = cospi_16_64;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = fdct32;
- ht.rows = fdct32;
- in_scale = 4;
- out_scale = (1 << (DCT_CONST_BITS - 2));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (i = 0; i < bs; ++i) {
- for (j = 0; j < bs; ++j)
- temp_in[j] = input[j * stride + i] * in_scale;
- ht.cols(temp_in, temp_out);
-
- for (j = 0; j < bs; ++j) {
- tran_high_t temp = (tran_high_t)temp_out[j] * out_scale;
- temp >>= DCT_CONST_BITS;
- output[j * coeff_stride + i] = (tran_low_t)temp;
- }
+ if (tx_type == IDTX) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
}
- return;
- }
-
- // Rows
- if (tx_type == H_DCT) {
- for (j = 0; j < bs; ++j) {
- for (i = 0; i < bs; ++i)
- temp_in[i] = input[j * stride + i] * in_scale;
- ht.rows(temp_in, temp_out);
-
- for (i = 0; i < bs; ++i) {
- tran_high_t temp = (tran_high_t)temp_out[i] * out_scale;
- temp >>= DCT_CONST_BITS;
- output[j * coeff_stride + i] = (tran_low_t)temp;
- }
- }
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
- src_diff += stride;
- coeff += bs;
}
}
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 6c76523..ec00b62 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1648,6 +1648,9 @@
totalrate_nocoef,
#endif // CONFIG_SUPERTX
bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
}
} else {
vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
@@ -1655,6 +1658,9 @@
totalrate_nocoef,
#endif // CONFIG_SUPERTX
bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
}
}
@@ -3681,13 +3687,8 @@
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
- // Initialize the left context for the new SB row
- memset(&xd->left_context, 0, sizeof(xd->left_context));
- memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
-#if CONFIG_VAR_TX
- memset(xd->left_txfm_context_buffer, 0,
- sizeof(xd->left_txfm_context_buffer));
-#endif
+ vp10_zero_left_context(xd);
+
// Code each SB in the row
for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
@@ -3785,19 +3786,9 @@
// Copy data over into macro block data structures.
vp10_setup_src_planes(x, cpi->Source, 0, 0);
- vp10_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
+ vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(xd->above_context[0], 0,
- sizeof(*xd->above_context[0]) *
- 2 * aligned_mi_cols * MAX_MB_PLANE);
- memset(xd->above_seg_context, 0,
- sizeof(*xd->above_seg_context) * aligned_mi_cols);
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*xd->above_txfm_context) * aligned_mi_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_mi_cols);
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index c3a739b..faedb43 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -65,6 +65,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
@@ -105,6 +107,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
@@ -145,6 +149,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
@@ -185,6 +191,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
@@ -226,11 +234,10 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
- // Use C version since DST exists only in C
- vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
@@ -270,11 +277,11 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST exists only in C
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
@@ -314,11 +321,11 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST exists only in C
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
@@ -355,10 +362,10 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
- vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index ac6adfe..16deebf 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -8298,11 +8298,12 @@
rd_cost->rate = rate2;
#if CONFIG_SUPERTX
- *returnrate_nocoef = rate2 - rate_y - rate_uv;
- if (!disable_skip) {
- *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
- skippable || this_skip2);
- }
+ if (x->skip && rate_y == INT_MAX)
+ *returnrate_nocoef = rate2;
+ else
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ disable_skip || skippable || this_skip2);
*returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
#if CONFIG_OBMC