Merge "Optimize HBD up-sampled prediction functions" into nextgenv2
diff --git a/configure b/configure
index 97366e4..ed1d048 100755
--- a/configure
+++ b/configure
@@ -284,6 +284,7 @@
ext_partition
ext_tile
obmc
+ entropy
"
CONFIG_LIST="
dependency_tracking
diff --git a/test/borders_test.cc b/test/borders_test.cc
index 6592375..ff3812c 100644
--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -52,7 +52,7 @@
// extend into the border and test the border condition.
cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 2000;
cfg_.rc_max_quantizer = 10;
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc
index 8baa2f9..6a938a0 100644
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -74,7 +74,7 @@
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 0;
cfg_.rc_min_quantizer = 0;
@@ -92,7 +92,7 @@
::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
cfg_.g_timebase = video.timebase();
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 0;
cfg_.rc_min_quantizer = 0;
@@ -109,7 +109,7 @@
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 12000;
cfg_.rc_max_quantizer = 10;
cfg_.rc_min_quantizer = 0;
@@ -125,7 +125,7 @@
// when passing in a very high min q. This pushes the encoder to producing
// lots of small partitions which might will test the other condition.
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 200;
cfg_.rc_min_quantizer = 40;
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index 29a653f..3445bf2 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -48,7 +48,7 @@
cfg_.g_lag_in_frames = 3;
cfg_.rc_end_usage = VPX_VBR;
cfg_.rc_2pass_vbr_minsection_pct = 5;
- cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_2pass_vbr_maxsection_pct = 2000;
} else {
cfg_.g_lag_in_frames = 0;
cfg_.rc_end_usage = VPX_CBR;
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c
index e14aee7..b3c216e 100644
--- a/vp10/common/alloccommon.c
+++ b/vp10/common/alloccommon.c
@@ -97,10 +97,13 @@
}
void vp10_free_context_buffers(VP10_COMMON *cm) {
+ int i;
cm->free_mi(cm);
free_seg_map(cm);
- vpx_free(cm->above_context);
- cm->above_context = NULL;
+ for (i = 0 ; i < MAX_MB_PLANE ; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = NULL;
+ }
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
#if CONFIG_VAR_TX
@@ -128,11 +131,14 @@
}
if (cm->above_context_alloc_cols < cm->mi_cols) {
- vpx_free(cm->above_context);
- cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc(
- 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE,
- sizeof(*cm->above_context));
- if (!cm->above_context) goto fail;
+ int i;
+ for (i = 0 ; i < MAX_MB_PLANE ; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
+ 2 * mi_cols_aligned_to_sb(cm->mi_cols),
+ sizeof(*cm->above_context[0]));
+ if (!cm->above_context[i]) goto fail;
+ }
vpx_free(cm->above_seg_context);
cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index d5139f7..016fc75 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -380,7 +380,7 @@
#if CONFIG_EXT_TX
#define ALLOW_INTRA_EXT_TX 1
// whether masked transforms are used for 32X32
-#define USE_MSKTX_FOR_32X32 1
+#define USE_MSKTX_FOR_32X32 0
static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = {
1, 19, 12, 2
@@ -447,7 +447,7 @@
{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
- { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1},
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
};
static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
diff --git a/vp10/common/common.h b/vp10/common/common.h
index 4abcbf6..54c7b89 100644
--- a/vp10/common/common.h
+++ b/vp10/common/common.h
@@ -33,12 +33,12 @@
// Use this for variably-sized arrays.
#define vp10_copy_array(dest, src, n) { \
- assert(sizeof(*dest) == sizeof(*src)); \
- memcpy(dest, src, n * sizeof(*src)); \
+ assert(sizeof(*(dest)) == sizeof(*(src))); \
+ memcpy(dest, src, n * sizeof(*(src))); \
}
#define vp10_zero(dest) memset(&(dest), 0, sizeof(dest))
-#define vp10_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest))
+#define vp10_zero_array(dest, n) memset(dest, 0, n * sizeof(*(dest)))
static INLINE int get_unsigned_bits(unsigned int num_values) {
return num_values > 0 ? get_msb(num_values) + 1 : 0;
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index f621ec6..863f0db 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -260,6 +260,30 @@
}
#if CONFIG_EXT_TX
+static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+}
+
+static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
// For use in lieu of DST
static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -291,6 +315,37 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)highbd_dct_const_round_shift(input[i] * Sqrt2, bd);
+}
+
+static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void) bd;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * 2 * Sqrt2, bd);
+}
+
+static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void) bd;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
int bd) {
int i;
@@ -331,85 +386,19 @@
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
-
- tran_low_t temp_in[32], temp_out[32];
- transform_2d ht = {idct4_c, idct4_c};
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = idct4_c;
- ht.rows = idct4_c;
- out_scale = cospi_16_64 >> 3;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = idct8_c;
- ht.rows = idct8_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = idct16_c;
- ht.rows = idct16_c;
- out_scale = cospi_16_64 >> 4;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = idct32_c;
- ht.rows = idct32_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (c = 0; c < bs; ++c) {
- for (r = 0; r < bs; ++r)
- temp_in[r] = input[r * coeff_stride + c];
- ht.cols(temp_in, temp_out);
-
- for (r = 0; r < bs; ++r) {
- tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp);
- }
- }
- return;
- }
-
- if (tx_type == H_DCT) {
+ if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
- temp_in[c] = input[r * coeff_stride + c];
- ht.rows(temp_in, temp_out);
-
- for (c = 0; c < bs; ++c) {
- tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp);
- }
+ dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+ dest += stride;
+ input += bs;
}
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
- dest += stride;
- input += bs;
}
}
#define FLIPUD_PTR(dest, stride, size) do { \
- (dest) = (dest) + ((size) - 1) * (stride); \
- (stride) = - (stride); \
+ (dest) = (dest) + ((size) - 1) * (stride); \
+ (stride) = - (stride); \
} while (0)
static void maybe_flip_strides(uint8_t **dst, int *dstride,
@@ -428,6 +417,7 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
case V_DCT:
case H_DCT:
break;
@@ -705,78 +695,13 @@
const int shift = bs < 32 ? 3 : 2;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- tran_low_t temp_in[32], temp_out[32];
- highbd_transform_2d ht = {vpx_highbd_idct4_c, vpx_highbd_idct4_c};
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = vpx_highbd_idct4_c;
- ht.rows = vpx_highbd_idct4_c;
- out_scale = cospi_16_64 >> 3;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = vpx_highbd_idct8_c;
- ht.rows = vpx_highbd_idct8_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = vpx_highbd_idct16_c;
- ht.rows = vpx_highbd_idct16_c;
- out_scale = cospi_16_64 >> 4;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = vpx_highbd_idct32_c;
- ht.rows = vpx_highbd_idct32_c;
- out_scale = (1 << (DCT_CONST_BITS - 4));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (c = 0; c < bs; ++c) {
- for (r = 0; r < bs; ++r)
- temp_in[r] = input[r * coeff_stride + c];
- ht.cols(temp_in, temp_out, bd);
-
- for (r = 0; r < bs; ++r) {
- tran_high_t temp = (tran_high_t)temp_out[r] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp, bd);
- }
- }
- return;
- }
-
- if (tx_type == H_DCT) {
+ if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
- temp_in[c] = input[r * coeff_stride + c];
- ht.rows(temp_in, temp_out, bd);
-
- for (c = 0; c < bs; ++c) {
- tran_high_t temp = (tran_high_t)temp_out[c] * out_scale;
- temp >>= DCT_CONST_BITS;
- dest[r * stride + c] = highbd_clip_pixel_add(dest[r * stride + c],
- (tran_low_t)temp, bd);
- }
+ dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+ dest += stride;
+ input += bs;
}
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
- dest += stride;
- input += bs;
}
}
@@ -796,6 +721,9 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
+ case V_DCT:
+ case H_DCT:
break;
case FLIPADST_DCT:
case FLIPADST_ADST:
@@ -843,6 +771,9 @@
{ idst4_c, iadst4_c }, // DST_FLIPADST = 13,
{ iadst4_c, idst4_c }, // FLIPADST_DST = 14,
{ idst4_c, idst4_c }, // DST_DST = 15
+ { iidtx4_c, iidtx4_c }, // IDTX = 16
+ { idct4_c, iidtx4_c }, // V_DCT = 17
+ { iidtx4_c, idct4_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -906,6 +837,9 @@
{ idst8_c, iadst8_c }, // DST_FLIPADST = 13,
{ iadst8_c, idst8_c }, // FLIPADST_DST = 14,
{ idst8_c, idst8_c }, // DST_DST = 15
+ { iidtx8_c, iidtx8_c }, // IDTX = 16
+ { idct8_c, iidtx8_c }, // V_DCT = 17
+ { iidtx8_c, idct8_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -969,6 +903,9 @@
{ idst16_c, iadst16_c }, // DST_FLIPADST = 13,
{ iadst16_c, idst16_c }, // FLIPADST_DST = 14,
{ idst16_c, idst16_c }, // DST_DST = 15
+ { iidtx16_c, iidtx16_c }, // IDTX = 16
+ { idct16_c, iidtx16_c }, // V_DCT = 17
+ { iidtx16_c, idct16_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1032,6 +969,9 @@
{ ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
{ ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
{ ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
+ { iidtx32_c, iidtx32_c }, // IDTX = 16
+ { idct32_c, iidtx32_c }, // V_DCT = 17
+ { iidtx32_c, idct32_c }, // H_DCT = 18
};
int i, j;
@@ -1165,11 +1105,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 4, tx_type);
break;
@@ -1206,11 +1146,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 8, tx_type);
break;
@@ -1247,11 +1187,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
inv_idtx_add_c(input, dest, stride, 16, tx_type);
break;
@@ -1284,10 +1224,10 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
- vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
+ break;
case IDTX:
inv_idtx_add_c(input, dest, stride, 32, tx_type);
break;
@@ -1319,6 +1259,9 @@
{ highbd_idst4_c, vpx_highbd_iadst4_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst4_c, highbd_idst4_c }, // FLIPADST_DST = 14,
{ highbd_idst4_c, highbd_idst4_c }, // DST_DST = 15
+ { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX = 16
+ { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT = 17
+ { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1385,6 +1328,9 @@
{ highbd_idst8_c, vpx_highbd_iadst8_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst8_c, highbd_idst8_c }, // FLIPADST_DST = 14,
{ highbd_idst8_c, highbd_idst8_c }, // DST_DST = 15
+ { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX = 16
+ { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT = 17
+ { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1451,6 +1397,9 @@
{ highbd_idst16_c, vpx_highbd_iadst16_c }, // DST_FLIPADST = 13,
{ vpx_highbd_iadst16_c, highbd_idst16_c }, // FLIPADST_DST = 14,
{ highbd_idst16_c, highbd_idst16_c }, // DST_DST = 15
+ { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX = 16
+ { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT = 17
+ { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1501,22 +1450,25 @@
void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
static const highbd_transform_2d HIGH_IHT_32[] = {
- { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
- { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
- { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
- { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
- { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
- { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
- { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
+ { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1657,11 +1609,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
break;
@@ -1699,11 +1651,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
break;
@@ -1741,11 +1693,11 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST only exists in C code
vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
break;
@@ -1779,10 +1731,10 @@
case ADST_DST:
case FLIPADST_DST:
case DST_FLIPADST:
- vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
+ break;
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
break;
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index bcc69f3..26ae569 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -313,7 +313,7 @@
BufferPool *buffer_pool;
PARTITION_CONTEXT *above_seg_context;
- ENTROPY_CONTEXT *above_context;
+ ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
#if CONFIG_VAR_TX
TXFM_CONTEXT *above_txfm_context;
TXFM_CONTEXT left_txfm_context[8];
@@ -405,9 +405,7 @@
for (i = 0; i < MAX_MB_PLANE; ++i) {
xd->plane[i].dqcoeff = dqcoeff;
- xd->above_context[i] = cm->above_context +
- i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
-
+ xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
} else {
@@ -525,6 +523,27 @@
return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
}
+static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
+ int mi_col_start, int mi_col_end) {
+ const int width = mi_col_end - mi_col_start;
+ int i;
+
+ for (i = 0 ; i < MAX_MB_PLANE ; i++)
+ vp10_zero_array(cm->above_context[i] + 2 * mi_col_start, 2 * width);
+ vp10_zero_array(cm->above_seg_context + mi_col_start, width);
+#if CONFIG_VAR_TX
+ vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
+#endif // CONFIG_VAR_TX
+}
+
+static INLINE void vp10_zero_left_context(MACROBLOCKD *const xd) {
+ vp10_zero(xd->left_context);
+ vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(xd->left_txfm_context_buffer);
+#endif
+}
+
#if CONFIG_VAR_TX
static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx,
TX_SIZE tx_size,
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index ce6317c..0441662 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -2962,18 +2962,7 @@
assert(tile_rows <= 4);
assert(tile_cols <= (1 << 6));
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(cm->above_context, 0,
- sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols);
-
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * aligned_cols);
-
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * aligned_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_cols);
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
@@ -3032,11 +3021,7 @@
tile_cols - tile_col - 1 : tile_col;
tile_data = pbi->tile_data + tile_cols * tile_row + col;
vp10_tile_set_col(&tile, tile_data->cm, col);
- vp10_zero(tile_data->xd.left_context);
- vp10_zero(tile_data->xd.left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(tile_data->xd.left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(pbi, &tile_data->xd,
@@ -3126,11 +3111,7 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp10_zero(tile_data->xd.left_context);
- vp10_zero(tile_data->xd.left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(tile_data->xd.left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(&tile_data->xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data->pbi, &tile_data->xd,
@@ -3211,16 +3192,8 @@
worker->data2 = &pbi->tile_worker_info[n];
}
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(cm->above_context, 0,
- sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols);
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * aligned_mi_cols);
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * aligned_mi_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_mi_cols);
+
// Load tile data into tile_buffers
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 1ef2ea5..f20c224 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1710,10 +1710,7 @@
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- vp10_zero(xd->left_seg_context);
-#if CONFIG_VAR_TX
- vp10_zero(xd->left_txfm_context_buffer);
-#endif
+ vp10_zero_left_context(xd);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
write_modes_sb(cpi, tile, w, tok, tok_end,
@@ -2190,12 +2187,7 @@
const int tile_rows = 1 << cm->log2_tile_rows;
unsigned int max_tile = 0;
- memset(cm->above_seg_context, 0,
- sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*cm->above_txfm_context) * mi_cols_aligned_to_sb(cm->mi_cols));
-#endif
+ vp10_zero_above_context(cm, 0, mi_cols_aligned_to_sb(cm->mi_cols));
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h
index 4fa5806..53c7142 100644
--- a/vp10/encoder/context_tree.h
+++ b/vp10/encoder/context_tree.h
@@ -54,7 +54,6 @@
int hybrid_pred_diff;
int comp_pred_diff;
int single_pred_diff;
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
// TODO(jingning) Use RD_COST struct here instead. This involves a boarder
// scope of refactoring.
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 31a4c87..8a1ee20 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -1212,6 +1212,30 @@
}
#if CONFIG_EXT_TX
+static void fidtx4(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
+}
+
+static void fidtx8(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i)
+ output[i] = input[i] * 2;
+}
+
+static void fidtx16(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void fidtx32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i)
+ output[i] = input[i] * 4;
+}
+
// For use in lieu of DST
static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -1315,6 +1339,7 @@
case DST_DCT:
case DST_ADST:
case ADST_DST:
+ case IDTX:
case H_DCT:
case V_DCT:
break;
@@ -1362,6 +1387,9 @@
{ fdst4, fadst4 }, // DST_FLIPADST = 13,
{ fadst4, fdst4 }, // FLIPADST_DST = 14,
{ fdst4, fdst4 }, // DST_DST = 15
+ { fidtx4, fidtx4 }, // IDTX = 16
+ { fdct4, fidtx4 }, // V_DCT = 17
+ { fidtx4, fdct4 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1383,6 +1411,9 @@
{ fdst8, fadst8 }, // DST_FLIPADST = 13,
{ fadst8, fdst8 }, // FLIPADST_DST = 14,
{ fdst8, fdst8 }, // DST_DST = 15
+ { fidtx8, fidtx8 }, // IDTX = 16
+ { fdct8, fidtx8 }, // V_DCT = 17
+ { fidtx8, fdct8 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1404,6 +1435,9 @@
{ fdst16, fadst16 }, // DST_FLIPADST = 13,
{ fadst16, fdst16 }, // FLIPADST_DST = 14,
{ fdst16, fdst16 }, // DST_DST = 15
+ { fidtx16, fidtx16 }, // IDTX = 16
+ { fdct16, fidtx16 }, // V_DCT = 17
+ { fidtx16, fdct16 }, // H_DCT = 18
#endif // CONFIG_EXT_TX
};
@@ -1425,6 +1459,9 @@
{ fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
{ fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
{ fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
+ { fidtx32, fidtx32 }, // IDTX = 16
+ { fdct32, fidtx32 }, // V_DCT = 17
+ { fidtx32, fdct32 }, // H_DCT = 18
};
#endif // CONFIG_EXT_TX
@@ -1766,86 +1803,12 @@
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
-
- const int16_t *input = src_diff;
- tran_low_t *output = coeff;
-
- int i, j;
- tran_low_t temp_in[32], temp_out[32];
- transform_2d ht = {fdct4, fdct4};
- int in_scale = 1;
- int out_scale = 1;
- int coeff_stride = 0;
-
- switch (bs) {
- case 4:
- ht.cols = fdct4;
- ht.rows = fdct4;
- in_scale = 16;
- out_scale = cospi_16_64 >> 1;
- coeff_stride = 4;
- break;
- case 8:
- ht.cols = fdct8;
- ht.rows = fdct8;
- in_scale = 4;
- out_scale = (1 << DCT_CONST_BITS);
- coeff_stride = 8;
- break;
- case 16:
- ht.cols = fdct16;
- ht.rows = fdct16;
- in_scale = 4;
- out_scale = cospi_16_64;
- coeff_stride = 16;
- break;
- case 32:
- ht.cols = fdct32;
- ht.rows = fdct32;
- in_scale = 4;
- out_scale = (1 << (DCT_CONST_BITS - 2));
- coeff_stride = 32;
- break;
- default:
- assert(0);
- }
-
- // Columns
- if (tx_type == V_DCT) {
- for (i = 0; i < bs; ++i) {
- for (j = 0; j < bs; ++j)
- temp_in[j] = input[j * stride + i] * in_scale;
- ht.cols(temp_in, temp_out);
-
- for (j = 0; j < bs; ++j) {
- tran_high_t temp = (tran_high_t)temp_out[j] * out_scale;
- temp >>= DCT_CONST_BITS;
- output[j * coeff_stride + i] = (tran_low_t)temp;
- }
+ if (tx_type == IDTX) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
}
- return;
- }
-
- // Rows
- if (tx_type == H_DCT) {
- for (j = 0; j < bs; ++j) {
- for (i = 0; i < bs; ++i)
- temp_in[i] = input[j * stride + i] * in_scale;
- ht.rows(temp_in, temp_out);
-
- for (i = 0; i < bs; ++i) {
- tran_high_t temp = (tran_high_t)temp_out[i] * out_scale;
- temp >>= DCT_CONST_BITS;
- output[j * coeff_stride + i] = (tran_low_t)temp;
- }
- }
- return;
- }
-
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
- src_diff += stride;
- coeff += bs;
}
}
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index c5a68a9..ec00b62 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1194,9 +1194,6 @@
rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- rdc->filter_diff[i] += ctx->best_filter_diff[i];
}
for (h = 0; h < y_mis; ++h) {
@@ -1316,9 +1313,6 @@
rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- rdc->filter_diff[i] += ctx->best_filter_diff[i];
}
for (h = 0; h < y_mis; ++h) {
@@ -1654,6 +1648,9 @@
totalrate_nocoef,
#endif // CONFIG_SUPERTX
bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
}
} else {
vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
@@ -1661,6 +1658,9 @@
totalrate_nocoef,
#endif // CONFIG_SUPERTX
bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
}
}
@@ -3687,13 +3687,8 @@
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
- // Initialize the left context for the new SB row
- memset(&xd->left_context, 0, sizeof(xd->left_context));
- memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
-#if CONFIG_VAR_TX
- memset(xd->left_txfm_context_buffer, 0,
- sizeof(xd->left_txfm_context_buffer));
-#endif
+ vp10_zero_left_context(xd);
+
// Code each SB in the row
for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
@@ -3791,19 +3786,9 @@
// Copy data over into macro block data structures.
vp10_setup_src_planes(x, cpi->Source, 0, 0);
- vp10_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
+ vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
- // Note: this memset assumes above_context[0], [1] and [2]
- // are allocated as part of the same buffer.
- memset(xd->above_context[0], 0,
- sizeof(*xd->above_context[0]) *
- 2 * aligned_mi_cols * MAX_MB_PLANE);
- memset(xd->above_seg_context, 0,
- sizeof(*xd->above_seg_context) * aligned_mi_cols);
-#if CONFIG_VAR_TX
- memset(cm->above_txfm_context, 0,
- sizeof(*xd->above_txfm_context) * aligned_mi_cols);
-#endif
+ vp10_zero_above_context(cm, 0, aligned_mi_cols);
}
static int check_dual_ref_flags(VP10_COMP *cpi) {
@@ -3971,7 +3956,6 @@
vp10_zero(*td->counts);
vp10_zero(rdc->coef_counts);
vp10_zero(rdc->comp_pred_diff);
- vp10_zero(rdc->filter_diff);
rdc->m_search_count = 0; // Count of motion search hits.
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
@@ -4039,31 +4023,9 @@
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}
-
-static INTERP_FILTER get_interp_filter(
- const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
-#if CONFIG_EXT_INTERP
- if (!is_alt_ref &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_SMOOTH] &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[EIGHTTAP_REGULAR] &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[MULTITAP_SHARP] &&
- threshes[EIGHTTAP_SMOOTH2] > threshes[SWITCHABLE - 1]) {
- return EIGHTTAP_SMOOTH2;
- }
-#endif // CONFIG_EXT_INTERP
- if (!is_alt_ref &&
- threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_REGULAR] &&
- threshes[EIGHTTAP_SMOOTH] > threshes[MULTITAP_SHARP] &&
- threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
- return EIGHTTAP_SMOOTH;
- } else if (threshes[MULTITAP_SHARP] > threshes[EIGHTTAP_REGULAR] &&
- threshes[MULTITAP_SHARP] > threshes[SWITCHABLE - 1]) {
- return MULTITAP_SHARP;
- } else if (threshes[EIGHTTAP_REGULAR] > threshes[SWITCHABLE - 1]) {
- return EIGHTTAP_REGULAR;
- } else {
- return SWITCHABLE;
- }
+static INTERP_FILTER get_cm_interp_filter(VP10_COMP *cpi) {
+ (void)cpi;
+ return SWITCHABLE;
}
void vp10_encode_frame(VP10_COMP *cpi) {
@@ -4116,7 +4078,6 @@
// INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
- int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
const int is_alt_ref = frame_type == ALTREF_FRAME;
/* prediction (compound, single or hybrid) mode selection */
@@ -4134,7 +4095,7 @@
cm->reference_mode = REFERENCE_MODE_SELECT;
if (cm->interp_filter == SWITCHABLE) {
- cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
+ cm->interp_filter = get_cm_interp_filter(cpi);
}
encode_frame_internal(cpi);
@@ -4142,9 +4103,6 @@
for (i = 0; i < REFERENCE_MODES; ++i)
mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
-
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
int single_count_zero = 0;
int comp_count_zero = 0;
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index afe3292..a319901 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -251,7 +251,6 @@
typedef struct RD_COUNTS {
vp10_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
- int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
int m_search_count;
int ex_search_count;
} RD_COUNTS;
diff --git a/vp10/encoder/ethread.c b/vp10/encoder/ethread.c
index 6cb9494..c586b9a 100644
--- a/vp10/encoder/ethread.c
+++ b/vp10/encoder/ethread.c
@@ -19,9 +19,6 @@
for (i = 0; i < REFERENCE_MODES; i++)
td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i];
-
for (i = 0; i < TX_SIZES; i++)
for (j = 0; j < PLANE_TYPES; j++)
for (k = 0; k < REF_TYPES; k++)
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index c3a739b..faedb43 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -65,6 +65,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
@@ -105,6 +107,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
@@ -145,6 +149,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
@@ -185,6 +191,8 @@
break;
case H_DCT:
case V_DCT:
+ vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
@@ -226,11 +234,10 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
- // Use C version since DST exists only in C
- vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
break;
@@ -270,11 +277,11 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST exists only in C
vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
break;
@@ -314,11 +321,11 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
+ case H_DCT:
+ case V_DCT:
// Use C version since DST exists only in C
vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
break;
- case H_DCT:
- case V_DCT:
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
break;
@@ -355,10 +362,10 @@
case ADST_DST:
case DST_FLIPADST:
case FLIPADST_DST:
- vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
- break;
case H_DCT:
case V_DCT:
+ vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
case IDTX:
vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
break;
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index 5a6a44a..61feabe 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -279,8 +279,6 @@
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
- int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
-
int RDMULT;
int RDDIV;
} RD_OPT;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c7a2702..16deebf 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -5452,7 +5452,6 @@
static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index,
int64_t comp_pred_diff[REFERENCE_MODES],
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
int skippable) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -5466,9 +5465,6 @@
ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
-
- memcpy(ctx->best_filter_diff, best_filter_diff,
- sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
}
static void setup_buffer_inter(
@@ -6036,9 +6032,7 @@
INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
int (*single_skippable)[MAX_REF_FRAMES],
int64_t *psse,
- const int64_t ref_best_rd,
- int64_t *mask_filter,
- int64_t filter_cache[]) {
+ const int64_t ref_best_rd) {
VP10_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -6075,28 +6069,6 @@
!is_comp_interintra_pred &&
#endif // CONFIG_EXT_INTER
is_obmc_allowed(mbmi);
- int best_obmc_flag = 0;
-#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
- DECLARE_ALIGNED(16, uint16_t, tmp_buf2_16[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
- uint8_t *tmp_buf1, *tmp_buf2;
- uint8_t *obmc_tmp_buf1[3];
- uint8_t *obmc_tmp_buf2[3];
-#else
- DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
- DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * CU_SIZE * CU_SIZE]);
- uint8_t *obmc_tmp_buf1[3] = {tmp_buf1, tmp_buf1 + CU_SIZE * CU_SIZE,
- tmp_buf1 + CU_SIZE * CU_SIZE * 2};
- uint8_t *obmc_tmp_buf2[3] = {tmp_buf2, tmp_buf2 + CU_SIZE * CU_SIZE,
- tmp_buf2 + CU_SIZE * CU_SIZE * 2};
-#endif // CONFIG_VP9_HIGHBITDEPTH
- int obmc_tmp_stride[3] = {CU_SIZE, CU_SIZE, CU_SIZE};
-
- uint8_t skip_txfm_bestfilter[2][MAX_MB_PLANE << 2] = {{0}, {0}};
- int64_t bsse_bestfilter[2][MAX_MB_PLANE << 2] = {{0}, {0}};
- int skip_txfm_sb_bestfilter[2] = {0};
- int64_t skip_sse_sb_bestfilter[2] = {INT64_MAX};
-
int rate2_nocoeff, best_rate2 = INT_MAX,
best_skippable, best_xskip, best_disable_skip = 0;
#if CONFIG_SUPERTX
@@ -6154,25 +6126,9 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
-#if CONFIG_OBMC
- tmp_buf1 = CONVERT_TO_BYTEPTR(tmp_buf1_16);
- tmp_buf2 = CONVERT_TO_BYTEPTR(tmp_buf2_16);
-#endif // CONFIG_OBMC
} else {
tmp_buf = (uint8_t *)tmp_buf16;
-#if CONFIG_OBMC
- tmp_buf1 = (uint8_t *)tmp_buf1_16;
- tmp_buf2 = (uint8_t *)tmp_buf2_16;
-#endif // CONFIG_OBMC
}
-#if CONFIG_OBMC
- obmc_tmp_buf1[0] = tmp_buf1;
- obmc_tmp_buf1[1] = tmp_buf1 + 4096;
- obmc_tmp_buf1[2] = tmp_buf1 + 8192;
- obmc_tmp_buf2[0] = tmp_buf2;
- obmc_tmp_buf2[1] = tmp_buf2 + 4096;
- obmc_tmp_buf2[2] = tmp_buf2 + 8192;
-#endif // CONFIG_OBMC
#endif // CONFIG_VP9_HIGHBITDEPTH
if (is_comp_pred) {
@@ -6419,11 +6375,6 @@
if (is_comp_pred)
intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
- // Search for best switchable filter by checking the variance of
- // pred error irrespective of whether the filter will be used
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
-
best_filter = predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
single_filter);
if (cm->interp_filter != BILINEAR && best_filter == SWITCHABLE) {
@@ -6436,15 +6387,6 @@
int64_t rs_rd;
int tmp_skip_sb = 0;
int64_t tmp_skip_sse = INT64_MAX;
-#if CONFIG_OBMC
- int obmc_flag = 0;
- int tmp_skip_sb_obmc = 0;
- int64_t tmp_skip_sse_obmc = INT64_MAX;
- int64_t rdobmc = INT64_MAX;
- uint8_t *obmc_tmp_buf[3];
- uint8_t tmp_skip_txfm[MAX_MB_PLANE << 2] = {0};
- int64_t tmp_bsse[MAX_MB_PLANE << 2] = {0};
-#endif // CONFIG_OBMC
mbmi->interp_filter = i;
rs = vp10_get_switchable_rate(cpi, xd);
@@ -6452,26 +6394,12 @@
if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
- filter_cache[i] = rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
-#if CONFIG_OBMC
- if (allow_obmc) {
- obmc_flag = best_obmc_flag;
- rd += RDCOST(x->rdmult, x->rddiv,
- cpi->obmc_cost[bsize][obmc_flag], 0);
- }
-#endif // CONFIG_OBMC
- *mask_filter = VPXMAX(*mask_filter, rd);
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
-#if CONFIG_OBMC
- int rate_sum_obmc = 0;
- int64_t dist_sum_obmc = 0;
-#endif // CONFIG_OBMC
+
if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
(cpi->sf.interp_filter_search_mask & (1 << i))) {
rate_sum = INT_MAX;
@@ -6488,18 +6416,10 @@
(cm->interp_filter == mbmi->interp_filter ||
(i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
restore_dst_buf(xd, orig_dst, orig_dst_stride);
-#if CONFIG_OBMC
- for (j = 0; j < MAX_MB_PLANE; j++) {
- obmc_tmp_buf[j] = obmc_tmp_buf1[j];
- }
-#endif // CONFIG_OBMC
} else {
for (j = 0; j < MAX_MB_PLANE; j++) {
xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
xd->plane[j].dst.stride = 64;
-#if CONFIG_OBMC
- obmc_tmp_buf[j] = obmc_tmp_buf2[j];
-#endif // CONFIG_OBMC
}
}
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
@@ -6507,40 +6427,8 @@
&tmp_skip_sb, &tmp_skip_sse);
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
-#if CONFIG_OBMC
- if (allow_obmc) {
- rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0);
- memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm));
- memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse));
-
- vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
- obmc_tmp_buf, obmc_tmp_stride,
- dst_buf1, dst_stride1,
- dst_buf2, dst_stride2);
- for (j = 0; j < MAX_MB_PLANE; ++j) {
- xd->plane[j].dst.buf = obmc_tmp_buf[j];
- xd->plane[j].dst.stride = obmc_tmp_stride[j];
- }
- model_rd_for_sb(cpi, bsize, x, xd, &rate_sum_obmc, &dist_sum_obmc,
- &tmp_skip_sb_obmc, &tmp_skip_sse_obmc);
- rdobmc = RDCOST(x->rdmult, x->rddiv,
- rate_sum_obmc + cpi->obmc_cost[bsize][1],
- dist_sum_obmc);
-
- if ((double)rdobmc <= 0.99 * (double)rd) {
- obmc_flag = 1;
- rd = rdobmc;
- rate_sum = rate_sum_obmc;
- dist_sum = dist_sum_obmc;
- }
- }
-#endif // CONFIG_OBMC
- filter_cache[i] = rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- *mask_filter = VPXMAX(*mask_filter, rd);
if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
tmp_rate_sum = rate_sum;
@@ -6557,10 +6445,6 @@
newbest = i == 0 || rd < best_rd;
if (newbest) {
-#if CONFIG_OBMC
- if (allow_obmc)
- best_obmc_flag = obmc_flag;
-#endif // CONFIG_OBMC
best_rd = rd;
best_filter = mbmi->interp_filter;
if (cm->interp_filter == SWITCHABLE && i &&
@@ -6574,31 +6458,6 @@
pred_exists = 1;
tmp_rd = best_rd;
-#if CONFIG_OBMC
- if (allow_obmc) {
- skip_txfm_sb_bestfilter[0] = tmp_skip_sb;
- skip_sse_sb_bestfilter[0] = tmp_skip_sse;
- memcpy(skip_txfm_bestfilter[0], tmp_skip_txfm, sizeof(skip_txfm));
- memcpy(bsse_bestfilter[0], tmp_bsse, sizeof(bsse));
-
- skip_txfm_sb_bestfilter[1] = tmp_skip_sb_obmc;
- skip_sse_sb_bestfilter[1] = tmp_skip_sse_obmc;
- memcpy(skip_txfm_bestfilter[1], x->skip_txfm, sizeof(skip_txfm));
- memcpy(bsse_bestfilter[1], x->bsse, sizeof(bsse));
- if (best_obmc_flag) {
- tmp_skip_sb = tmp_skip_sb_obmc;
- tmp_skip_sse = tmp_skip_sse_obmc;
- } else {
- memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm));
- memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse));
- }
- } else {
- skip_txfm_sb_bestfilter[0] = tmp_skip_sb;
- skip_sse_sb_bestfilter[0] = tmp_skip_sse;
- memcpy(skip_txfm_bestfilter[0], x->skip_txfm, sizeof(skip_txfm));
- memcpy(bsse_bestfilter[0], x->bsse, sizeof(bsse));
- }
-#endif // CONFIG_OBMC
skip_txfm_sb = tmp_skip_sb;
skip_sse_sb = tmp_skip_sse;
memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
@@ -6923,15 +6782,7 @@
#endif // CONFIG_EXT_INTERP
#endif // CONFIG_EXT_INTER
-#if CONFIG_OBMC
- if (allow_obmc)
- mbmi->obmc = best_obmc_flag;
- else
- mbmi->obmc = 0;
-#endif // CONFIG_OBMC
-
if (pred_exists) {
-#if !CONFIG_OBMC
if (best_needs_copy) {
// again temporarily set the buffers to local memory to prevent a memcpy
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -6939,20 +6790,11 @@
xd->plane[i].dst.stride = 64;
}
}
-#endif // !CONFIG_OBMC
- rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
-#if CONFIG_OBMC
- if (allow_obmc)
- rd += RDCOST(x->rdmult, x->rddiv,
- cpi->obmc_cost[bsize][mbmi->obmc], 0);
-#endif // CONFIG_OBMC
+ rd = tmp_rd;
} else {
int tmp_rate;
int64_t tmp_dist;
-#if CONFIG_OBMC
- int64_t rdobmc = INT64_MAX;
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
-#endif // CONFIG_OBMC
+
// Handles the special case when a filter that is not in the
// switchable list (ex. bilinear) is indicated at the frame level, or
// skip condition holds.
@@ -6960,34 +6802,6 @@
model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
&skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
-#if CONFIG_OBMC
- skip_txfm_sb_bestfilter[0] = skip_txfm_sb;
- skip_sse_sb_bestfilter[0] = skip_sse_sb;
- memcpy(skip_txfm_bestfilter[0], x->skip_txfm, sizeof(skip_txfm));
- memcpy(bsse_bestfilter[0], x->bsse, sizeof(bsse));
- if (allow_obmc) {
- rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0);
- vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
- obmc_tmp_buf1, obmc_tmp_stride,
- dst_buf1, dst_stride1,
- dst_buf2, dst_stride2);
- for (i = 0; i < MAX_MB_PLANE; ++i) {
- xd->plane[i].dst.buf = obmc_tmp_buf1[i];
- xd->plane[i].dst.stride = obmc_tmp_stride[i];
- }
- model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
- &skip_txfm_sb, &skip_sse_sb);
- rdobmc = RDCOST(x->rdmult, x->rddiv,
- rs + tmp_rate + cpi->obmc_cost[bsize][1], tmp_dist);
-
- skip_txfm_sb_bestfilter[1] = skip_txfm_sb;
- skip_sse_sb_bestfilter[1] = skip_sse_sb;
- memcpy(skip_txfm_bestfilter[1], x->skip_txfm, sizeof(skip_txfm));
- memcpy(bsse_bestfilter[1], x->bsse, sizeof(bsse));
- if ((double)rdobmc <= 0.99 * (double)rd)
- rd = rdobmc;
- }
-#endif // CONFIG_OBMC
memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
memcpy(bsse, x->bsse, sizeof(bsse));
}
@@ -7078,40 +6892,16 @@
#if CONFIG_OBMC
best_rd = INT64_MAX;
for (mbmi->obmc = 0; mbmi->obmc <= allow_obmc; mbmi->obmc++) {
- int64_t tmp_rd;
+ int64_t tmp_rd, tmp_dist;
+ int tmp_rate;
- if (pred_exists) {
- if (best_needs_copy) {
- if (mbmi->obmc) {
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = obmc_tmp_buf2[i];
- xd->plane[i].dst.stride = 64;
- }
- } else {
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
- xd->plane[i].dst.stride = 64;
- }
- }
- } else {
- if (mbmi->obmc) {
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = obmc_tmp_buf1[i];
- xd->plane[i].dst.stride = 64;
- }
- } else {
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
- }
- }
- } else {
- if (mbmi->obmc) {
- for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].dst.buf = obmc_tmp_buf1[i];
- xd->plane[i].dst.stride = 64;
- }
- } else {
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
- }
+ if (mbmi->obmc) {
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
+ NULL, NULL,
+ dst_buf1, dst_stride1,
+ dst_buf2, dst_stride2);
+ model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
+ &skip_txfm_sb, &skip_sse_sb);
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -7126,11 +6916,6 @@
vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
#endif // CONFIG_VP9_HIGHBITDEPTH
- skip_txfm_sb = skip_txfm_sb_bestfilter[mbmi->obmc];
- skip_sse_sb = skip_sse_sb_bestfilter[mbmi->obmc];
- memcpy(x->skip_txfm, skip_txfm_bestfilter[mbmi->obmc],
- sizeof(skip_txfm));
- memcpy(x->bsse, bsse_bestfilter[mbmi->obmc], sizeof(bsse));
x->skip = 0;
*rate2 = rate2_nocoeff;
@@ -7231,10 +7016,7 @@
// The cost of skip bit needs to be added.
#if CONFIG_OBMC
- mbmi->skip = xd->lossless[mbmi->segment_id] ? 0 : 1;
- if (xd->lossless[mbmi->segment_id])
- *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
- else
+ mbmi->skip = 0;
#endif // CONFIG_OBMC
*rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
@@ -7590,8 +7372,6 @@
int64_t best_rd = best_rd_so_far;
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
- int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode;
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
@@ -7629,8 +7409,6 @@
int64_t mode_threshold[MAX_MODES];
int *mode_map = tile_data->mode_map[bsize];
const int mode_search_skip_flags = sf->mode_search_skip_flags;
- int64_t mask_filter = 0;
- int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
int palette_ctx = 0;
const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
@@ -7686,16 +7464,11 @@
sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
#endif // CONFIG_EXT_INTRA
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
-
estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
&comp_mode_p);
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = INT64_MAX;
for (i = 0; i < TX_SIZES; i++)
rate_uv_intra[i] = INT_MAX;
for (i = 0; i < MAX_REF_FRAMES; ++i)
@@ -8280,8 +8053,7 @@
#endif // CONFIG_EXT_INTER
single_inter_filter,
single_skippable,
- &total_sse, best_rd,
- &mask_filter, filter_cache);
+ &total_sse, best_rd);
#if CONFIG_REF_MV
// TODO(jingning): This needs some refactoring to improve code quality
@@ -8329,12 +8101,10 @@
clamp_mv2(&cur_mv.as_mv, xd);
if (!mv_check_bounds(x, &cur_mv.as_mv)) {
- int64_t dummy_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
INTERP_FILTER dummy_single_inter_filter[MB_MODE_COUNT]
[MAX_REF_FRAMES];
int dummy_single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
int dummy_disable_skip = 0;
- int64_t dummy_mask_filter = 0;
#if CONFIG_EXT_INTER
int_mv dummy_single_newmvs[2][MAX_REF_FRAMES] =
{ { { 0 } }, { { 0 } } };
@@ -8367,9 +8137,7 @@
#endif
dummy_single_inter_filter,
dummy_single_skippable,
- &tmp_sse, best_rd,
- &dummy_mask_filter,
- dummy_filter_cache);
+ &tmp_sse, best_rd);
}
tmp_rate += cpi->drl_mode_cost0[drl0_ctx][1];
@@ -8511,8 +8279,6 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@@ -8532,11 +8298,12 @@
rd_cost->rate = rate2;
#if CONFIG_SUPERTX
- *returnrate_nocoef = rate2 - rate_y - rate_uv;
- if (!disable_skip) {
- *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
- skippable || this_skip2);
- }
+ if (x->skip && rate_y == INT_MAX)
+ *returnrate_nocoef = rate2;
+ else
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ disable_skip || skippable || this_skip2);
*returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
mbmi->ref_frame[0] != INTRA_FRAME);
#if CONFIG_OBMC
@@ -8611,29 +8378,6 @@
}
if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
-
- /* keep record of best filter type */
- if (!mode_excluded && cm->interp_filter != BILINEAR) {
- int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
- SWITCHABLE_FILTERS : cm->interp_filter];
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- int64_t adj_rd;
- if (ref == INT64_MAX)
- adj_rd = 0;
- else if (filter_cache[i] == INT64_MAX)
- // when early termination is triggered, the encoder does not have
- // access to the rate-distortion cost. it only knows that the cost
- // should be above the maximum valid value. hence it takes the known
- // maximum plus an arbitrary constant as the rate-distortion cost.
- adj_rd = mask_filter - ref + 10;
- else
- adj_rd = filter_cache[i] - ref;
-
- adj_rd += this_rd;
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
- }
- }
}
if (early_term)
@@ -8928,21 +8672,6 @@
best_pred_diff[i] = best_rd - best_pred_rd[i];
}
- if (!x->skip) {
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- if (best_filter_rd[i] == INT64_MAX)
- best_filter_diff[i] = 0;
- else
- best_filter_diff[i] = best_rd - best_filter_rd[i];
- }
- if (cm->interp_filter == SWITCHABLE)
- assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
- } else {
- vp10_zero(best_filter_diff);
- }
-
- // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
- // updating code causes PSNR loss. Need to figure out the confliction.
x->skip |= best_mode_skippable;
if (!x->skip && !x->select_tx_size) {
@@ -8966,7 +8695,7 @@
assert(best_mode_index >= 0);
store_coding_context(x, ctx, best_mode_index, best_pred_diff,
- best_filter_diff, best_mode_skippable);
+ best_mode_skippable);
if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
restore_uv_color_map(cpi, x);
@@ -8987,7 +8716,6 @@
const int comp_pred = 0;
int i;
int64_t best_pred_diff[REFERENCE_MODES];
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vpx_prob comp_mode_p;
INTERP_FILTER best_filter = SWITCHABLE;
@@ -9072,12 +8800,11 @@
cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
vp10_zero(best_pred_diff);
- vp10_zero(best_filter_diff);
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
store_coding_context(x, ctx, THR_ZEROMV,
- best_pred_diff, best_filter_diff, 0);
+ best_pred_diff, 0);
}
void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
@@ -9117,8 +8844,6 @@
int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
- int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
- int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode;
int ref_index, best_ref_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
@@ -9138,8 +8863,6 @@
b_mode_info best_bmodes[4];
int best_skip2 = 0;
int ref_frame_skip_mask[2] = { 0 };
- int64_t mask_filter = 0;
- int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
int internal_active_edge =
vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
@@ -9163,9 +8886,6 @@
mbmi->use_wedge_interintra = 0;
#endif // CONFIG_EXT_INTER
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
-
for (i = 0; i < 4; i++) {
int j;
#if CONFIG_EXT_INTER
@@ -9185,8 +8905,6 @@
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = INT64_MAX;
rate_uv_intra = INT_MAX;
rd_cost->rate = INT_MAX;
@@ -9444,8 +9162,6 @@
#endif // CONFIG_EXT_REFS
this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- filter_cache[i] = INT64_MAX;
// TODO(any): Add search of the tx_type to improve rd performance at the
// expense of speed.
@@ -9489,14 +9205,9 @@
continue;
rs = vp10_get_switchable_rate(cpi, xd);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- filter_cache[switchable_filter_index] = tmp_rd;
- filter_cache[SWITCHABLE_FILTERS] =
- VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
tmp_rd += rs_rd;
- mask_filter = VPXMAX(mask_filter, tmp_rd);
-
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
tmp_best_filter = mbmi->interp_filter;
@@ -9668,8 +9379,6 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@@ -9764,29 +9473,6 @@
best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
}
- /* keep record of best filter type */
- if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
- cm->interp_filter != BILINEAR) {
- int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
- SWITCHABLE_FILTERS : cm->interp_filter];
- int64_t adj_rd;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- if (ref == INT64_MAX)
- adj_rd = 0;
- else if (filter_cache[i] == INT64_MAX)
- // when early termination is triggered, the encoder does not have
- // access to the rate-distortion cost. it only knows that the cost
- // should be above the maximum valid value. hence it takes the known
- // maximum plus an arbitrary constant as the rate-distortion cost.
- adj_rd = mask_filter - ref + 10;
- else
- adj_rd = filter_cache[i] - ref;
-
- adj_rd += this_rd;
- best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
- }
- }
-
if (early_term)
break;
@@ -9858,21 +9544,8 @@
best_pred_diff[i] = best_rd - best_pred_rd[i];
}
- if (!x->skip) {
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- if (best_filter_rd[i] == INT64_MAX)
- best_filter_diff[i] = 0;
- else
- best_filter_diff[i] = best_rd - best_filter_rd[i];
- }
- if (cm->interp_filter == SWITCHABLE)
- assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
- } else {
- vp10_zero(best_filter_diff);
- }
-
store_coding_context(x, ctx, best_ref_index,
- best_pred_diff, best_filter_diff, 0);
+ best_pred_diff, 0);
}
#if CONFIG_OBMC
diff --git a/vp8/common/common.h b/vp8/common/common.h
index e58a9cc..c42e875 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -32,13 +32,13 @@
/* Use this for variably-sized arrays. */
#define vp8_copy_array( Dest, Src, N) { \
- assert( sizeof( *Dest) == sizeof( *Src)); \
- memcpy( Dest, Src, N * sizeof( *Src)); \
+ assert( sizeof( *(Dest)) == sizeof( *(Src))); \
+ memcpy( Dest, Src, N * sizeof( *(Src))); \
}
-#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
+#define vp8_zero( Dest) memset( &(Dest), 0, sizeof( Dest));
-#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
+#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *(Dest)));
#ifdef __cplusplus
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 76e7cd4..9d5dbc6 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -33,12 +33,12 @@
// Use this for variably-sized arrays.
#define vp9_copy_array(dest, src, n) { \
- assert(sizeof(*dest) == sizeof(*src)); \
- memcpy(dest, src, n * sizeof(*src)); \
+ assert(sizeof(*(dest)) == sizeof(*(src))); \
+ memcpy(dest, src, n * sizeof(*(src))); \
}
#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest))
-#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest))
+#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*(dest)))
static INLINE int get_unsigned_bits(unsigned int num_values) {
return num_values > 0 ? get_msb(num_values) + 1 : 0;