Merge "Remove duplicated variables in EXT_INTER" into nextgenv2
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 597d5b2..ee46820 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -111,6 +111,9 @@
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add sse2 avx2/;
+
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht32x32_1024_add/;
}
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
@@ -141,6 +144,10 @@
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add/;
+
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht32x32_1024_add/;
+
} else {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add sse2 neon dspr2/;
@@ -169,6 +176,9 @@
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add sse2 avx2 dspr2/;
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht32x32_1024_add/;
+
if (aom_config("CONFIG_EXT_TX") ne "yes") {
specialize qw/av1_iht4x4_16_add msa/;
specialize qw/av1_iht8x8_64_add msa/;
@@ -176,6 +186,13 @@
}
}
}
+add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+specialize qw/av1_iht32x32_1024_add/;
+
+if (aom_config("CONFIG_TX64X64") eq "yes") {
+ add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/av1_iht64x64_4096_add/;
+}
if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 2ec83ec..6c0eb3d 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -134,24 +134,32 @@
#if CONFIG_CB4X4
TX_2X2, // 2x2 transform
#endif
- TX_4X4, // 4x4 transform
- TX_8X8, // 8x8 transform
- TX_16X16, // 16x16 transform
- TX_32X32, // 32x32 transform
- TX_4X8, // 4x8 transform
- TX_8X4, // 8x4 transform
- TX_8X16, // 8x16 transform
- TX_16X8, // 16x8 transform
- TX_16X32, // 16x32 transform
- TX_32X16, // 32x16 transform
- TX_SIZES_ALL, // Includes rectangular transforms
- TX_SIZES = TX_32X32 + 1, // Does NOT include rectangular transforms
- TX_INVALID = 255 // Invalid transform size
+ TX_4X4, // 4x4 transform
+ TX_8X8, // 8x8 transform
+ TX_16X16, // 16x16 transform
+ TX_32X32, // 32x32 transform
+#if CONFIG_TX64X64
+ TX_64X64, // 64x64 transform
+#endif // CONFIG_TX64X64
+ TX_4X8, // 4x8 transform
+ TX_8X4, // 8x4 transform
+ TX_8X16, // 8x16 transform
+ TX_16X8, // 16x8 transform
+ TX_16X32, // 16x32 transform
+ TX_32X16, // 32x16 transform
+#if 0 // CONFIG_TX64X64
+ // TODO(debargha): To be enabled later
+ TX_32X64, // 32x64 transform
+ TX_64X32, // 64x32 transform
+#endif // CONFIG_TX64X64
+ TX_SIZES_ALL, // Includes rectangular transforms
+ TX_SIZES = TX_4X8, // Does NOT include rectangular transforms
+ TX_INVALID = 255 // Invalid transform size
} TX_SIZE;
#define MAX_TX_DEPTH (TX_32X32 - TX_4X4)
-#define MAX_TX_SIZE_LOG2 5
+#define MAX_TX_SIZE_LOG2 (5 + CONFIG_TX64X64)
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 223c577..2663d2d 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -23,14 +23,14 @@
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
const TX_SIZE tx_size) {
(void)tx_type;
-#if CONFIG_AOM_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- return txsize_sqr_up_map[tx_size] == TX_32X32;
- }
-#else
(void)xd;
-#endif
- return txsize_sqr_up_map[tx_size] == TX_32X32;
+ if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
+#if CONFIG_TX64X64
+ else if (txsize_sqr_up_map[tx_size] == TX_64X64)
+ return 2;
+#endif // CONFIG_TX64X64
+ else
+ return 0;
}
// NOTE: The implementation of all inverses need to be aware of the fact
@@ -58,6 +58,14 @@
int i;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
}
+
+#if CONFIG_TX64X64
+static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 64; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
// For use in lieu of ADST
@@ -94,12 +102,6 @@
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
-static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 64; ++i)
- output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
-}
-
// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
int i;
@@ -174,7 +176,10 @@
output[i] =
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 4 * Sqrt2), bd);
}
+#endif // CONFIG_TX64X64
+#endif // CONFIG_EXT_TX
+#if CONFIG_TX64X64
// For use in lieu of ADST
static void highbd_ihalfright64_c(const tran_low_t *input, tran_low_t *output,
int bd) {
@@ -215,7 +220,6 @@
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
#endif // CONFIG_AOM_HIGHBITDEPTH
// Inverse identity transform and add.
@@ -223,7 +227,7 @@
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int bs, int tx_type) {
int r, c;
- const int shift = bs < 32 ? 3 : 2;
+ const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
@@ -929,6 +933,7 @@
}
}
}
+#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
@@ -938,6 +943,7 @@
{ ihalfright64_c, idct64_row_c }, // ADST_DCT
{ idct64_col_c, ihalfright64_c }, // DCT_ADST
{ ihalfright64_c, ihalfright64_c }, // ADST_ADST
+#if CONFIG_EXT_TX
{ ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
{ idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
{ ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
@@ -950,6 +956,7 @@
{ iidtx64_c, ihalfright64_c }, // H_ADST
{ ihalfright64_c, iidtx64_c }, // V_FLIPADST
{ iidtx64_c, ihalfright64_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
};
int i, j;
@@ -979,7 +986,9 @@
IHT_64[tx_type].cols(out[i], out[i]);
}
+#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
+#endif // CONFIG_EXT_TX
// Sum with the destination
for (i = 0; i < 64; ++i) {
@@ -991,7 +1000,6 @@
}
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
// idct
void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
@@ -1056,6 +1064,14 @@
aom_idct32x32_1024_add(input, dest, stride);
}
+#if CONFIG_TX64X64
+void av1_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ (void)eob;
+ av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
+}
+#endif // CONFIG_TX64X64
+
void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type, int lossless) {
if (lossless) {
@@ -1206,6 +1222,35 @@
}
}
+#if CONFIG_TX64X64
+void av1_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT: av1_idct64x64_add(input, dest, stride, eob); break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
+
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
@@ -1835,6 +1880,7 @@
}
}
}
+#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
@@ -1844,6 +1890,7 @@
{ highbd_ihalfright64_c, highbd_idct64_row_c }, // ADST_DCT
{ highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_ADST
{ highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_ADST
+#if CONFIG_EXT_TX
{ highbd_ihalfright64_c, highbd_idct64_row_c }, // FLIPADST_DCT
{ highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_FLIPADST
{ highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_FLIPADST
@@ -1856,6 +1903,7 @@
{ highbd_iidtx64_c, highbd_ihalfright64_c }, // H_ADST
{ highbd_ihalfright64_c, highbd_iidtx64_c }, // V_FLIPADST
{ highbd_iidtx64_c, highbd_ihalfright64_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1887,7 +1935,9 @@
HIGH_IHT_64[tx_type].cols(out[i], out[i], bd);
}
+#if CONFIG_EXT_TX
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
+#endif // CONFIG_EXT_TX
// Sum with the destination
for (i = 0; i < 64; ++i) {
@@ -1900,7 +1950,6 @@
}
}
#endif // CONFIG_TX64X64
-#endif // CONFIG_EXT_TX
// idct
void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
@@ -2155,6 +2204,42 @@
default: assert(0); break;
}
}
+
+#if CONFIG_TX64X64
+void av1_highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ switch (tx_type) {
+ case DCT_DCT:
+ av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
+ DCT_DCT, bd);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ av1_highbd_iht64x64_4096_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
@@ -2165,6 +2250,11 @@
const int lossless = inv_txfm_param->lossless;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ av1_inv_txfm_add_64x64(input, dest, stride, eob, tx_type);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
av1_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
break;
@@ -2206,6 +2296,11 @@
const int lossless = inv_txfm_param->lossless;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ av1_highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
av1_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
break;
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 8f0a212..f1c4239 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -369,6 +369,7 @@
const int tile_height = rst->tile_height >> rst->subsampling_y;
int i, j, subtile_idx;
int h_start, h_end, v_start, v_end;
+ const int shift = bit_depth - 8;
for (subtile_idx = 0; subtile_idx < BILATERAL_SUBTILES; ++subtile_idx) {
uint16_t *data_p, *tmpdata_p;
@@ -398,7 +399,7 @@
for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
wt = (int)wx_lut[y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] *
- (int)wr_lut_[data_p2[x] - data_p[j]];
+ (int)wr_lut_[(data_p2[x] >> shift) - (data_p[j] >> shift)];
wtsum += (int64_t)wt;
flsum += (int64_t)wt * data_p2[x];
}
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index f73e777..89534de 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -215,7 +215,7 @@
} else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
return p[1];
} else {
- const int64_t v1 = x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
+ const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
const int64_t v3 = x * (p[1] - p[-1]);
const int64_t v4 = 2 * p[0];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 0c155e0..8b0de2f 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -6180,11 +6180,12 @@
ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
const struct macroblockd_plane *const pd = &xd->plane[plane];
int coeff_ctx = 1;
+ RD_STATS this_rd_stats;
- this_rate = 0;
- this_dist = 0;
- pnsse = 0;
- pnskip = 1;
+ this_rd_stats.rate = 0;
+ this_rd_stats.dist = 0;
+ this_rd_stats.sse = 0;
+ this_rd_stats.skip = 1;
tx_size = max_txsize_lookup[bsize];
tx_size =
@@ -6194,8 +6195,13 @@
av1_subtract_plane(x, bsize, plane);
av1_tx_block_rd_b(cpi, x, tx_size, 0, 0, plane, 0,
- get_plane_block_size(bsize, pd), coeff_ctx, &this_rate,
- &this_dist, &pnsse, &pnskip);
+ get_plane_block_size(bsize, pd), coeff_ctx,
+ &this_rd_stats);
+
+ this_rate = this_rd_stats.rate;
+ this_dist = this_rd_stats.dist;
+ pnsse = this_rd_stats.sse;
+ pnskip = this_rd_stats.skip;
#else
tx_size = max_txsize_lookup[bsize];
tx_size =
@@ -6223,7 +6229,9 @@
ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
const struct macroblockd_plane *const pd = &xd->plane[0];
int coeff_ctx = 1;
+ RD_STATS this_rd_stats;
#endif // CONFIG_VAR_TX
+
#if CONFIG_EXT_TX
if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
#else
@@ -6232,15 +6240,20 @@
mbmi->tx_type = tx_type;
#if CONFIG_VAR_TX
- this_rate = 0;
- this_dist = 0;
- pnsse = 0;
- pnskip = 1;
+ this_rd_stats.rate = 0;
+ this_rd_stats.dist = 0;
+ this_rd_stats.sse = 0;
+ this_rd_stats.skip = 1;
av1_get_entropy_contexts(bsize, tx_size, pd, ctxa, ctxl);
coeff_ctx = combine_entropy_contexts(ctxa[0], ctxl[0]);
- av1_tx_block_rd_b(cpi, x, tx_size, 0, 0, 0, 0, bsize, coeff_ctx, &this_rate,
- &this_dist, &pnsse, &pnskip);
+ av1_tx_block_rd_b(cpi, x, tx_size, 0, 0, 0, 0, bsize, coeff_ctx,
+ &this_rd_stats);
+
+ this_rate = this_rd_stats.rate;
+ this_dist = this_rd_stats.dist;
+ pnsse = this_rd_stats.sse;
+ pnskip = this_rd_stats.skip;
#else
av1_txfm_rd_in_plane_supertx(x, cpi, &this_rate, &this_dist, &pnskip,
&pnsse, INT64_MAX, 0, bsize, tx_size, 0);
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index 5d88dbf..d8abea9 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c
@@ -25,7 +25,7 @@
#define MAX_CORNERS 4096
#define MIN_INLIER_PROB 0.1
-INLINE RansacFunc get_ransac_type(TransformationType type) {
+static INLINE RansacFunc get_ransac_type(TransformationType type) {
switch (type) {
case HOMOGRAPHY: return ransac_homography;
case AFFINE: return ransac_affine;
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index ff03516..a88c884 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -24,6 +24,24 @@
av1_fht32x32(src, dst, src_stride, DCT_DCT);
}
+#if CONFIG_TX64X64
+static INLINE void fdct64x64(const int16_t *src, tran_low_t *dst,
+ int src_stride) {
+ av1_fht64x64(src, dst, src_stride, DCT_DCT);
+}
+
+static INLINE void fdct64x64_1(const int16_t *src, tran_low_t *dst,
+ int src_stride) {
+ int i, j;
+ int32_t sum = 0;
+ memset(dst, 0, sizeof(*dst) * 4096);
+ for (i = 0; i < 64; ++i)
+ for (j = 0; j < 64; ++j) sum += src[i * src_stride + j];
+ // Note: this scaling makes the transform 2 times unitary
+ dst[0] = ROUND_POWER_OF_TWO_SIGNED(sum, 5);
+}
+#endif // CONFIG_TX64X64
+
static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
if (lossless) {
@@ -192,6 +210,41 @@
}
}
+#if CONFIG_TX64X64
+static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ fdct64x64(src_diff, coeff, diff_stride);
+ else // FWD_TXFM_OPT_DC
+ fdct64x64_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ av1_fht64x64(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST: av1_fht32x32(src_diff, coeff, diff_stride, tx_type); break;
+ case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
+
#if CONFIG_AOM_HIGHBITDEPTH
static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless,
@@ -379,6 +432,40 @@
default: assert(0); break;
}
}
+
+#if CONFIG_TX64X64
+static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ switch (tx_type) {
+ case DCT_DCT:
+ av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
@@ -389,6 +476,11 @@
const int rd_transform = fwd_txfm_param->rd_transform;
const int lossless = fwd_txfm_param->lossless;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt);
@@ -434,6 +526,12 @@
const int lossless = fwd_txfm_param->lossless;
const int bd = fwd_txfm_param->bd;
switch (tx_size) {
+#if CONFIG_TX64X64
+ case TX_64X64:
+ highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+#endif // CONFIG_TX64X64
case TX_32X32:
highbd_fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
diff --git a/av1/encoder/ransac.c b/av1/encoder/ransac.c
index 714d567..2699c4f 100644
--- a/av1/encoder/ransac.c
+++ b/av1/encoder/ransac.c
@@ -81,10 +81,12 @@
const int stride_points,
const int stride_proj) {
int i;
- double x, y, Z;
+ double x, y, Z, Z_inv;
for (i = 0; i < n; ++i) {
x = *(points++), y = *(points++);
- Z = 1. / (mat[7] * x + mat[6] * y + 1);
+ Z_inv = mat[7] * x + mat[6] * y + 1;
+ assert(fabs(Z_inv) > 0.00001);
+ Z = 1. / Z_inv;
*(proj++) = (mat[1] * x + mat[0] * y + mat[3]) * Z;
*(proj++) = (mat[2] * x + mat[4] * y + mat[4]) * Z;
points += stride_points - 2;
@@ -155,7 +157,7 @@
double T1[9], T2[9];
*number_of_inliers = 0;
- if (npoints < minpts * MINPTS_MULTIPLIER) {
+ if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
printf("Cannot find motion with %d matches\n", npoints);
return 1;
}
@@ -245,11 +247,15 @@
}
}
- if (num_inliers >= max_inliers) {
- double mean_distance = sum_distance / ((double)num_inliers);
- double variance = sum_distance_squared / ((double)num_inliers - 1.0) -
- mean_distance * mean_distance * ((double)num_inliers) /
- ((double)num_inliers - 1.0);
+ if (num_inliers >= max_inliers && num_inliers > 1) {
+ int temp;
+ double fracinliers, pNoOutliers, mean_distance, variance;
+
+ assert(num_inliers > 1);
+ mean_distance = sum_distance / ((double)num_inliers);
+ variance = sum_distance_squared / ((double)num_inliers - 1.0) -
+ mean_distance * mean_distance * ((double)num_inliers) /
+ ((double)num_inliers - 1.0);
if ((num_inliers > max_inliers) ||
(num_inliers == max_inliers && variance < best_variance)) {
best_variance = variance;
@@ -262,16 +268,15 @@
memcpy(best_inlier_mask, inlier_mask,
npoints * sizeof(*best_inlier_mask));
- if (num_inliers > 0) {
- double fracinliers = (double)num_inliers / (double)npoints;
- double pNoOutliers = 1 - pow(fracinliers, minpts);
- int temp;
- pNoOutliers = fmax(EPS, pNoOutliers);
- pNoOutliers = fmin(1 - EPS, pNoOutliers);
- temp = (int)(log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers));
- if (temp > 0 && temp < N) {
- N = AOMMAX(temp, MIN_TRIALS);
- }
+ assert(npoints > 0);
+ fracinliers = (double)num_inliers / (double)npoints;
+ pNoOutliers = 1 - pow(fracinliers, minpts);
+ pNoOutliers = fmax(EPS, pNoOutliers);
+ pNoOutliers = fmin(1 - EPS, pNoOutliers);
+ assert(fabs(1.0 - pNoOutliers) > 0.00001);
+ temp = (int)(log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers));
+ if (temp > 0 && temp < N) {
+ N = AOMMAX(temp, MIN_TRIALS);
}
}
}
@@ -356,6 +361,7 @@
// normalize so that H33 = 1
int i;
const double m = 1.0 / best_params[8];
+ assert(fabs(best_params[8]) > 0.00001);
for (i = 0; i < 8; ++i) best_params[i] *= m;
best_params[8] = 1.0;
}
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index a3929ef..6a750d9 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2906,8 +2906,7 @@
#if CONFIG_VAR_TX
void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blk_row, int blk_col, int plane, int block,
- int plane_bsize, int coeff_ctx, int *rate, int64_t *dist,
- int64_t *bsse, int *skip) {
+ int plane_bsize, int coeff_ctx, RD_STATS *rd_stats) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
@@ -2994,7 +2993,7 @@
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
#endif // CONFIG_AOM_HIGHBITDEPTH
- *bsse += tmp * 16;
+ rd_stats->sse += tmp * 16;
if (p->eobs[block] > 0) {
INV_TXFM_PARAM inv_txfm_param;
@@ -3035,10 +3034,10 @@
tmp = this_dist;
}
}
- *dist += tmp * 16;
- *rate += av1_cost_coeffs(cm, x, plane, block, coeff_ctx, tx_size,
- scan_order->scan, scan_order->neighbors, 0);
- *skip &= (p->eobs[block] == 0);
+ rd_stats->dist += tmp * 16;
+ rd_stats->rate += av1_cost_coeffs(cm, x, plane, block, coeff_ctx, tx_size,
+ scan_order->scan, scan_order->neighbors, 0);
+ rd_stats->skip &= (p->eobs[block] == 0);
}
static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
@@ -3046,8 +3045,8 @@
int depth, BLOCK_SIZE plane_bsize,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
- int *rate, int64_t *dist, int64_t *bsse, int *skip,
- int64_t ref_best_rd, int *is_cost_valid) {
+ RD_STATS *rd_stats, int64_t ref_best_rd,
+ int *is_cost_valid) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
@@ -3086,10 +3085,10 @@
coeff_ctx = get_entropy_context(tx_size, pta, ptl);
- *rate = 0;
- *dist = 0;
- *bsse = 0;
- *skip = 1;
+ rd_stats->rate = 0;
+ rd_stats->dist = 0;
+ rd_stats->sse = 0;
+ rd_stats->skip = 1;
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
@@ -3099,25 +3098,26 @@
if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
inter_tx_size[0][0] = tx_size;
av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
- plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+ plane_bsize, coeff_ctx, rd_stats);
- if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
- RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) ||
- *skip == 1) &&
+ if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >=
+ RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) ||
+ rd_stats->skip == 1) &&
!xd->lossless[mbmi->segment_id]) {
- *rate = zero_blk_rate;
- *dist = *bsse;
- *skip = 1;
+ rd_stats->rate = zero_blk_rate;
+ rd_stats->dist = rd_stats->sse;
+ rd_stats->skip = 1;
x->blk_skip[plane][blk_row * bw + blk_col] = 1;
p->eobs[block] = 0;
} else {
x->blk_skip[plane][blk_row * bw + blk_col] = 0;
- *skip = 0;
+ rd_stats->skip = 0;
}
if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
- *rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
- this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
+ rd_stats->rate +=
+ av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
tmp_eob = p->eobs[block];
}
@@ -3125,10 +3125,7 @@
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsl = tx_size_wide_unit[sub_txs];
int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
- int this_rate;
- int64_t this_dist;
- int64_t this_bsse;
- int this_skip;
+ RD_STATS this_rd_stats;
int this_cost_valid = 1;
int64_t tmp_rd = 0;
@@ -3141,12 +3138,12 @@
int offsetc = (i & 0x01) * bsl;
select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
block + i * sub_step, sub_txs, depth + 1, plane_bsize, ta,
- tl, tx_above, tx_left, &this_rate, &this_dist, &this_bsse,
- &this_skip, ref_best_rd - tmp_rd, &this_cost_valid);
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_bsse += this_bsse;
- all_skip &= this_skip;
+ tl, tx_above, tx_left, &this_rd_stats,
+ ref_best_rd - tmp_rd, &this_cost_valid);
+ sum_rate += this_rd_stats.rate;
+ sum_dist += this_rd_stats.dist;
+ sum_bsse += this_rd_stats.sse;
+ all_skip &= this_rd_stats.skip;
tmp_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (this_rd < tmp_rd) break;
}
@@ -3165,29 +3162,29 @@
inter_tx_size[idy][idx] = tx_size;
mbmi->tx_size = tx_size;
if (this_rd == INT64_MAX) *is_cost_valid = 0;
- x->blk_skip[plane][blk_row * bw + blk_col] = *skip;
+ x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
} else {
- *rate = sum_rate;
- *dist = sum_dist;
- *bsse = sum_bsse;
- *skip = all_skip;
+ rd_stats->rate = sum_rate;
+ rd_stats->dist = sum_dist;
+ rd_stats->sse = sum_bsse;
+ rd_stats->skip = all_skip;
if (sum_rd == INT64_MAX) *is_cost_valid = 0;
}
}
-static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int64_t *distortion, int *skippable, int64_t *sse,
- BLOCK_SIZE bsize, int64_t ref_best_rd) {
+static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
+ RD_STATS *rd_stats, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
int is_cost_valid = 1;
int64_t this_rd = 0;
if (ref_best_rd < 0) is_cost_valid = 0;
- *rate = 0;
- *distortion = 0;
- *sse = 0;
- *skippable = 1;
+ rd_stats->rate = 0;
+ rd_stats->dist = 0;
+ rd_stats->sse = 0;
+ rd_stats->skip = 1;
if (is_cost_valid) {
const struct macroblockd_plane *const pd = &xd->plane[0];
@@ -3205,8 +3202,11 @@
TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
- int pnrate = 0, pnskip = 1;
- int64_t pndist = 0, pnsse = 0;
+ RD_STATS pn_rd_stats;
+ pn_rd_stats.rate = 0;
+ pn_rd_stats.skip = 1;
+ pn_rd_stats.dist = 0;
+ pn_rd_stats.sse = 0;
av1_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
memcpy(tx_above, xd->above_txfm_context,
@@ -3218,35 +3218,35 @@
for (idx = 0; idx < mi_width; idx += bw) {
select_tx_block(cpi, x, idy, idx, 0, block, max_tx_size,
mi_height != mi_width, plane_bsize, ctxa, ctxl,
- tx_above, tx_left, &pnrate, &pndist, &pnsse, &pnskip,
- ref_best_rd - this_rd, &is_cost_valid);
- *rate += pnrate;
- *distortion += pndist;
- *sse += pnsse;
- *skippable &= pnskip;
- this_rd += AOMMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
- RDCOST(x->rdmult, x->rddiv, 0, pnsse));
+ tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
+ &is_cost_valid);
+ rd_stats->rate += pn_rd_stats.rate;
+ rd_stats->dist += pn_rd_stats.dist;
+ rd_stats->sse += pn_rd_stats.sse;
+ rd_stats->skip &= pn_rd_stats.skip;
+ this_rd += AOMMIN(
+ RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist),
+ RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse));
block += step;
}
}
}
- this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
- RDCOST(x->rdmult, x->rddiv, 0, *sse));
+ this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
+ RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
if (this_rd > ref_best_rd) is_cost_valid = 0;
if (!is_cost_valid) {
// reset cost value
- *rate = INT_MAX;
- *distortion = INT64_MAX;
- *sse = INT64_MAX;
- *skippable = 0;
+ rd_stats->rate = INT_MAX;
+ rd_stats->dist = INT64_MAX;
+ rd_stats->sse = INT64_MAX;
+ rd_stats->skip = 0;
}
}
static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
- int *rate, int64_t *dist, int *skippable,
- int64_t *sse, BLOCK_SIZE bsize,
+ RD_STATS *rd_stats, BLOCK_SIZE bsize,
int64_t ref_best_rd, TX_TYPE tx_type) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3258,55 +3258,52 @@
int64_t rd;
mbmi->tx_type = tx_type;
- inter_block_yrd(cpi, x, rate, dist, skippable, sse, bsize, ref_best_rd);
+ inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_rect_tx_allowed(xd, mbmi)) {
- int rate_rect_tx, skippable_rect_tx = 0;
- int64_t dist_rect_tx, sse_rect_tx, rd_rect_tx;
+ RD_STATS rect_rd_stats;
+ int64_t rd_rect_tx;
int tx_size_cat = inter_tx_size_cat_lookup[bsize];
TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
TX_SIZE var_tx_size = mbmi->tx_size;
- txfm_rd_in_plane(x, cpi, &rate_rect_tx, &dist_rect_tx, &skippable_rect_tx,
- &sse_rect_tx, ref_best_rd, 0, bsize, tx_size,
- cpi->sf.use_fast_coef_costing);
+ txfm_rd_in_plane(x, cpi, &rect_rd_stats.rate, &rect_rd_stats.dist,
+ &rect_rd_stats.skip, &rect_rd_stats.sse, ref_best_rd, 0,
+ bsize, tx_size, cpi->sf.use_fast_coef_costing);
- if (*rate != INT_MAX) {
- *rate += av1_cost_bit(cm->fc->rect_tx_prob[tx_size_cat], 0);
- if (*skippable) {
- rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ if (rd_stats->rate != INT_MAX) {
+ rd_stats->rate += av1_cost_bit(cm->fc->rect_tx_prob[tx_size_cat], 0);
+ if (rd_stats->skip) {
+ rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
} else {
- rd = RDCOST(x->rdmult, x->rddiv, *rate + s0, *dist);
+ rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- !(*skippable))
- rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+ !rd_stats->skip)
+ rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
}
} else {
rd = INT64_MAX;
}
- if (rate_rect_tx != INT_MAX) {
- rate_rect_tx += av1_cost_bit(cm->fc->rect_tx_prob[tx_size_cat], 1);
- if (skippable_rect_tx) {
- rd_rect_tx = RDCOST(x->rdmult, x->rddiv, s1, sse_rect_tx);
+ if (rect_rd_stats.rate != INT_MAX) {
+ rect_rd_stats.rate += av1_cost_bit(cm->fc->rect_tx_prob[tx_size_cat], 1);
+ if (rect_rd_stats.skip) {
+ rd_rect_tx = RDCOST(x->rdmult, x->rddiv, s1, rect_rd_stats.sse);
} else {
- rd_rect_tx =
- RDCOST(x->rdmult, x->rddiv, rate_rect_tx + s0, dist_rect_tx);
+ rd_rect_tx = RDCOST(x->rdmult, x->rddiv, rect_rd_stats.rate + s0,
+ rect_rd_stats.dist);
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
- !(skippable_rect_tx))
- rd_rect_tx =
- AOMMIN(rd_rect_tx, RDCOST(x->rdmult, x->rddiv, s1, sse_rect_tx));
+ !(rect_rd_stats.skip))
+ rd_rect_tx = AOMMIN(
+ rd_rect_tx, RDCOST(x->rdmult, x->rddiv, s1, rect_rd_stats.sse));
}
} else {
rd_rect_tx = INT64_MAX;
}
if (rd_rect_tx < rd) {
- *rate = rate_rect_tx;
- *dist = dist_rect_tx;
- *sse = sse_rect_tx;
- *skippable = skippable_rect_tx;
- if (!xd->lossless[mbmi->segment_id]) x->blk_skip[0][0] = *skippable;
+ *rd_stats = rect_rd_stats;
+ if (!xd->lossless[mbmi->segment_id]) x->blk_skip[0][0] = rd_stats->skip;
mbmi->tx_size = tx_size;
mbmi->inter_tx_size[0][0] = mbmi->tx_size;
} else {
@@ -3315,7 +3312,7 @@
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
- if (*rate == INT_MAX) return INT64_MAX;
+ if (rd_stats->rate == INT_MAX) return INT64_MAX;
#if CONFIG_EXT_TX
if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter) > 1 &&
@@ -3323,41 +3320,41 @@
int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bsize, is_inter);
if (is_inter) {
if (ext_tx_set > 0)
- *rate +=
+ rd_stats->rate +=
cpi->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->tx_size]]
[mbmi->tx_type];
} else {
if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
- *rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->mode]
- [mbmi->tx_type];
+ rd_stats->rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->mode][mbmi->tx_type];
}
}
#else // CONFIG_EXT_TX
if (mbmi->tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
if (is_inter)
- *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ rd_stats->rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
else
- *rate +=
+ rd_stats->rate +=
cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
#endif // CONFIG_EXT_TX
- if (*skippable)
- rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ if (rd_stats->skip)
+ rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
else
- rd = RDCOST(x->rdmult, x->rddiv, *rate + s0, *dist);
+ rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
- if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*skippable))
- rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ !(rd_stats->skip))
+ rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
return rd;
}
-static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int64_t *distortion, int *skippable,
- int64_t *sse, BLOCK_SIZE bsize,
+static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
+ RD_STATS *rd_stats, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3383,16 +3380,17 @@
prune = prune_tx_types(cpi, bsize, x, xd, 0);
#endif
- *distortion = INT64_MAX;
- *rate = INT_MAX;
- *skippable = 0;
- *sse = INT64_MAX;
+ rd_stats->dist = INT64_MAX;
+ rd_stats->rate = INT_MAX;
+ rd_stats->skip = 0;
+ rd_stats->sse = INT64_MAX;
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
- int this_rate = 0;
- int this_skip = 1;
- int64_t this_dist = 0;
- int64_t this_sse = 0;
+ RD_STATS this_rd_stats;
+ this_rd_stats.rate = 0;
+ this_rd_stats.skip = 1;
+ this_rd_stats.dist = 0;
+ this_rd_stats.sse = 0;
#if CONFIG_EXT_TX
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
@@ -3415,15 +3413,12 @@
tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
continue;
- rd = select_tx_size_fix_type(cpi, x, &this_rate, &this_dist, &this_skip,
- &this_sse, bsize, ref_best_rd, tx_type);
+ rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
+ tx_type);
if (rd < best_rd) {
best_rd = rd;
- *distortion = this_dist;
- *rate = this_rate;
- *skippable = this_skip;
- *sse = this_sse;
+ *rd_stats = this_rd_stats;
best_tx_type = mbmi->tx_type;
best_tx = mbmi->tx_size;
memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
@@ -3444,8 +3439,7 @@
static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
int blk_col, int plane, int block, TX_SIZE tx_size,
BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
- ENTROPY_CONTEXT *left_ctx, int *rate, int64_t *dist,
- int64_t *bsse, int *skip) {
+ ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
@@ -3473,7 +3467,7 @@
ENTROPY_CONTEXT *tl = left_ctx + blk_row;
coeff_ctx = get_entropy_context(tx_size, ta, tl);
av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
- plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+ plane_bsize, coeff_ctx, rd_stats);
for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
ta[i] = !(p->eobs[block] == 0);
@@ -3492,16 +3486,16 @@
int offsetc = (i & 0x01) * bsl;
tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
block + i * step, sub_txs, plane_bsize, above_ctx, left_ctx,
- rate, dist, bsse, skip);
+ rd_stats);
}
}
}
// Return value 0: early termination triggered, no valid rd cost available;
// 1: rd cost values are valid.
-static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
- int64_t *distortion, int *skippable, int64_t *sse,
- BLOCK_SIZE bsize, int64_t ref_best_rd) {
+static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
+ RD_STATS *rd_stats, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int plane;
@@ -3510,14 +3504,15 @@
if (ref_best_rd < 0) is_cost_valid = 0;
- *rate = 0;
- *distortion = 0;
- *sse = 0;
- *skippable = 1;
+ rd_stats->rate = 0;
+ rd_stats->dist = 0;
+ rd_stats->sse = 0;
+ rd_stats->skip = 1;
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_rect_tx(mbmi->tx_size)) {
- return super_block_uvrd(cpi, x, rate, distortion, skippable, sse, bsize,
+ return super_block_uvrd(cpi, x, &rd_stats->rate, &rd_stats->dist,
+ &rd_stats->skip, &rd_stats->sse, bsize,
ref_best_rd);
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -3538,33 +3533,37 @@
int idx, idy;
int block = 0;
const int step = bh * bw;
- int pnrate = 0, pnskip = 1;
- int64_t pndist = 0, pnsse = 0;
ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
+ RD_STATS pn_rd_stats;
+ pn_rd_stats.rate = 0;
+ pn_rd_stats.skip = 1;
+ pn_rd_stats.dist = 0;
+ pn_rd_stats.sse = 0;
av1_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bw) {
tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
- ta, tl, &pnrate, &pndist, &pnsse, &pnskip);
+ ta, tl, &pn_rd_stats);
block += step;
}
}
- if (pnrate == INT_MAX) {
+ if (pn_rd_stats.rate == INT_MAX) {
is_cost_valid = 0;
break;
}
- *rate += pnrate;
- *distortion += pndist;
- *sse += pnsse;
- *skippable &= pnskip;
+ rd_stats->rate += pn_rd_stats.rate;
+ rd_stats->dist += pn_rd_stats.dist;
+ rd_stats->sse += pn_rd_stats.sse;
+ rd_stats->skip &= pn_rd_stats.skip;
- this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
- RDCOST(x->rdmult, x->rddiv, 0, *sse));
+ this_rd =
+ AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
+ RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
if (this_rd > ref_best_rd) {
is_cost_valid = 0;
@@ -3574,10 +3573,10 @@
if (!is_cost_valid) {
// reset cost value
- *rate = INT_MAX;
- *distortion = INT64_MAX;
- *sse = INT64_MAX;
- *skippable = 0;
+ rd_stats->rate = INT_MAX;
+ rd_stats->dist = INT64_MAX;
+ rd_stats->sse = INT64_MAX;
+ rd_stats->skip = 0;
}
return is_cost_valid;
@@ -7493,13 +7492,21 @@
int skippable_y, skippable_uv;
int64_t sseuv = INT64_MAX;
int64_t rdcosty = INT64_MAX;
+ int is_cost_valid_uv = 0;
+#if CONFIG_VAR_TX
+ RD_STATS rd_stats_uv;
+#endif
// Y cost and distortion
av1_subtract_plane(x, bsize, 0);
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
- select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
- bsize, ref_best_rd);
+ RD_STATS rd_stats_y;
+ select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, ref_best_rd);
+ *rate_y = rd_stats_y.rate;
+ distortion_y = rd_stats_y.dist;
+ skippable_y = rd_stats_y.skip;
+ *psse = rd_stats_y.sse;
} else {
int idx, idy;
super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
@@ -7537,20 +7544,25 @@
rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
#if CONFIG_VAR_TX
- if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
- &sseuv, bsize, ref_best_rd - rdcosty))
+ is_cost_valid_uv =
+ inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, ref_best_rd - rdcosty);
+ *rate_uv = rd_stats_uv.rate;
+ distortion_uv = rd_stats_uv.dist;
+ skippable_uv = rd_stats_uv.skip;
+ sseuv = rd_stats_uv.sse;
#else
- if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
- &sseuv, bsize, ref_best_rd - rdcosty))
+ is_cost_valid_uv =
+ super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
+ &sseuv, bsize, ref_best_rd - rdcosty);
#endif // CONFIG_VAR_TX
- {
+ if (!is_cost_valid_uv) {
*rate2 = INT_MAX;
*distortion = INT64_MAX;
#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
continue;
#else
- restore_dst_buf(xd, orig_dst, orig_dst_stride);
- return INT64_MAX;
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
@@ -9201,6 +9213,9 @@
}
if (is_inter_mode(mbmi->mode)) {
+#if CONFIG_VAR_TX
+ RD_STATS rd_stats_uv;
+#endif
av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#if CONFIG_MOTION_VAR
if (mbmi->motion_mode == OBMC_CAUSAL)
@@ -9210,8 +9225,12 @@
av1_subtract_plane(x, bsize, 0);
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
- select_tx_type_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
- INT64_MAX);
+ RD_STATS rd_stats_y;
+ select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+ rate_y = rd_stats_y.rate;
+ dist_y = rd_stats_y.dist;
+ sse_y = rd_stats_y.sse;
+ skip_y = rd_stats_y.skip;
} else {
int idx, idy;
super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
@@ -9223,8 +9242,11 @@
sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
}
- inter_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv, &sse_uv, bsize,
- INT64_MAX);
+ inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+ rate_uv = rd_stats_uv.rate;
+ dist_uv = rd_stats_uv.dist;
+ skip_uv = rd_stats_uv.skip;
+ sse_uv = rd_stats_uv.sse;
#else
super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
INT64_MAX);
@@ -9424,7 +9446,10 @@
#endif // CONFIG_REF_MV
#if CONFIG_GLOBAL_MOTION
zeromv[0].as_int = cm->global_motion[refs[0]].motion_params.wmmat[0].as_int;
- zeromv[1].as_int = cm->global_motion[refs[1]].motion_params.wmmat[0].as_int;
+ if (comp_pred_mode) {
+ zeromv[1].as_int =
+ cm->global_motion[refs[1]].motion_params.wmmat[0].as_int;
+ }
#else
zeromv[0].as_int = 0;
zeromv[1].as_int = 0;
@@ -10354,16 +10379,24 @@
if (tmp_best_rdu > 0) {
// If even the 'Y' rd value of split is higher than best so far
// then dont bother looking at UV
+ int is_cost_valid_uv;
+#if CONFIG_VAR_TX
+ RD_STATS rd_stats_uv;
+#endif
av1_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8);
#if CONFIG_VAR_TX
- if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- &uv_sse, BLOCK_8X8, tmp_best_rdu))
- continue;
+ is_cost_valid_uv =
+ inter_block_uvrd(cpi, x, &rd_stats_uv, BLOCK_8X8, tmp_best_rdu);
+ rate_uv = rd_stats_uv.rate;
+ distortion_uv = rd_stats_uv.dist;
+ uv_skippable = rd_stats_uv.skip;
+ uv_sse = rd_stats_uv.sse;
#else
- if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- &uv_sse, BLOCK_8X8, tmp_best_rdu))
- continue;
+ is_cost_valid_uv =
+ super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ &uv_sse, BLOCK_8X8, tmp_best_rdu);
#endif
+ if (!is_cost_valid_uv) continue;
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 5d9fc12..8c65770 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -26,6 +26,16 @@
struct macroblock;
struct RD_COST;
+#if CONFIG_VAR_TX
+// TODO(angiebird): Merge RD_COST and RD_STATS
+typedef struct RD_STATS {
+ int rate;
+ int64_t dist;
+ int64_t sse;
+ int skip;
+} RD_STATS;
+#endif
+
int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
int block, int coeff_ctx, TX_SIZE tx_size,
const int16_t *scan, const int16_t *nb,
@@ -77,8 +87,7 @@
#if CONFIG_VAR_TX
void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blk_row, int blk_col, int plane, int block,
- int plane_bsize, int coeff_ctx, int *rate, int64_t *dist,
- int64_t *bsse, int *skip);
+ int plane_bsize, int coeff_ctx, RD_STATS *rd_stats);
#endif
void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,