JNT_COMP: Refactor code
The refactoring serves two purposes:
1. Separate code paths for jnt_comp and original compound average
computation. It provides function interface for jnt_comp while leaving
original compound average computation unchanged. In near future, SIMD
functions can be added for jnt_comp using the interface.
2. Previous implementation uses a hack on second_pred. But it may cause
segmentation fault when the test clip is small. As reported in Issue
944. This refactoring removes hacking and make it possible to address
the seg fault problem in the future.
Change-Id: Idd2cb99f6c77dae03d32ccfa1f9cbed1d7eed067
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 0e78eea..0b7fbca 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -7,6 +7,7 @@
#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
#include "av1/common/enums.h"
+#include "av1/common/blockd.h"
EOF
}
@@ -829,6 +830,9 @@
($w, $h) = @$_;
add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+ if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+ add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
+ }
}
specialize qw/aom_sad128x128 avx2 sse2/;
@@ -1100,9 +1104,14 @@
#
add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
specialize qw/aom_upsampled_pred sse2/;
+
add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
specialize qw/aom_comp_avg_upsampled_pred sse2/;
+ if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+ add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
+ }
+
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
specialize qw/aom_highbd_upsampled_pred sse2/;
@@ -1133,6 +1142,9 @@
add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+ if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+ add_proto qw/uint32_t/, "aom_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
+ }
}
specialize qw/aom_variance64x64 sse2 avx2 neon msa/;
@@ -1309,7 +1321,6 @@
add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
-
#
# Specialty Subpixel
#
@@ -1326,6 +1337,9 @@
# Comp Avg
#
add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+ if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+ add_proto qw/void aom_jnt_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
+ }
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/aom_highbd_12_variance64x64 sse2/;
diff --git a/aom_dsp/sad.c b/aom_dsp/sad.c
index 77587d5..871d13e 100644
--- a/aom_dsp/sad.c
+++ b/aom_dsp/sad.c
@@ -33,6 +33,28 @@
return sad;
}
+#if CONFIG_JNT_COMP
+#define sadMxN(m, n) \
+ unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride) { \
+ return sad(src, src_stride, ref, ref_stride, m, n); \
+ } \
+ unsigned int aom_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred) { \
+ uint8_t comp_pred[m * n]; \
+ aom_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
+ return sad(src, src_stride, comp_pred, m, m, n); \
+ } \
+ unsigned int aom_jnt_sad##m##x##n##_avg_c( \
+ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
+ uint8_t comp_pred[m * n]; \
+ aom_jnt_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride, \
+ jcp_param); \
+ return sad(src, src_stride, comp_pred, m, m, n); \
+ }
+#else // CONFIG_JNT_COMP
#define sadMxN(m, n) \
unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride) { \
@@ -45,6 +67,7 @@
aom_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \
return sad(src, src_stride, comp_pred, m, m, n); \
}
+#endif // CONFIG_JNT_COMP
// depending on call sites, pass **ref_array to avoid & in subsequent call and
// de-dup with 4D below.
diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index c07f46d..16ad001 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c
@@ -180,6 +180,43 @@
return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
}
+#if CONFIG_JNT_COMP
+#define SUBPIX_AVG_VAR(W, H) \
+ uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *a, int a_stride, int xoffset, int yoffset, \
+ const uint8_t *b, int b_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
+ \
+ return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
+ } \
+ uint32_t aom_jnt_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *a, int a_stride, int xoffset, int yoffset, \
+ const uint8_t *b, int b_stride, uint32_t *sse, \
+ const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ aom_jnt_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
+ \
+ return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
+ }
+#else // CONFIG_JNT_COMP
#define SUBPIX_AVG_VAR(W, H) \
uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \
const uint8_t *a, int a_stride, int xoffset, int yoffset, \
@@ -198,6 +235,7 @@
\
return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
}
+#endif // CONFIG_JNT_COMP
/* Identical to the variance call except it takes an additional parameter, sum,
* and returns that value using pass-by-reference instead of returning
@@ -275,23 +313,11 @@
void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride) {
int i, j;
-#if CONFIG_JNT_COMP
- int bck_offset = pred[4096];
- int fwd_offset = pred[4097];
- double sum = bck_offset + fwd_offset;
-#endif // CONFIG_JNT_COMP
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
-#if CONFIG_JNT_COMP
- int tmp = pred[j] * fwd_offset + ref[j] * bck_offset;
- tmp = (int)(0.5 + tmp / sum);
- if (tmp > 255) tmp = 255;
- comp_pred[j] = (uint8_t)tmp;
-#else
const int tmp = pred[j] + ref[j];
comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
-#endif // CONFIG_JNT_COMP
}
comp_pred += width;
pred += width;
@@ -352,36 +378,66 @@
int subpel_y_q3, const uint8_t *ref,
int ref_stride) {
int i, j;
-#if CONFIG_JNT_COMP
- int bck_offset = pred[4096];
- int fwd_offset = pred[4097];
- double sum = bck_offset + fwd_offset;
-#endif // CONFIG_JNT_COMP
-#if CONFIG_JNT_COMP
- aom_upsampled_pred_c(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
- ref_stride);
-#else
aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
ref_stride);
-#endif // CONFIG_JNT_COMP
-
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
-#if CONFIG_JNT_COMP
- int tmp = pred[j] * fwd_offset + comp_pred[j] * bck_offset;
- tmp = (int)(0.5 + tmp / sum);
- if (tmp > 255) tmp = 255;
- comp_pred[j] = (uint8_t)tmp;
-#else
comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
-#endif // CONFIG_JNT_COMP
}
comp_pred += width;
pred += width;
}
}
+#if CONFIG_JNT_COMP
+void aom_jnt_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride,
+ const JNT_COMP_PARAMS *jcp_param) {
+ int i, j;
+ const int fwd_offset = jcp_param->fwd_offset;
+ const int bck_offset = jcp_param->bck_offset;
+ double sum = bck_offset + fwd_offset;
+
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
+ tmp = (int)(0.5 + tmp / sum);
+ if (tmp > 255) tmp = 255;
+ comp_pred[j] = (uint8_t)tmp;
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+
+void aom_jnt_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
+ int width, int height, int subpel_x_q3,
+ int subpel_y_q3, const uint8_t *ref,
+ int ref_stride,
+ const JNT_COMP_PARAMS *jcp_param) {
+ int i, j;
+ const int fwd_offset = jcp_param->fwd_offset;
+ const int bck_offset = jcp_param->bck_offset;
+ double sum = bck_offset + fwd_offset;
+
+ aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
+ ref_stride);
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
+ tmp = (int)(0.5 + tmp / sum);
+ if (tmp > 255) tmp = 255;
+ comp_pred[j] = (uint8_t)tmp;
+ }
+ comp_pred += width;
+ pred += width;
+ }
+}
+#endif // CONFIG_JNT_COMP
+
#if CONFIG_HIGHBITDEPTH
static void highbd_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, int w, int h,
diff --git a/aom_dsp/variance.h b/aom_dsp/variance.h
index d12cd91..d4a1e83 100644
--- a/aom_dsp/variance.h
+++ b/aom_dsp/variance.h
@@ -54,6 +54,18 @@
const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
int b_stride, unsigned int *sse, const uint8_t *second_pred);
+#if CONFIG_JNT_COMP
+typedef unsigned int (*aom_jnt_sad_avg_fn_t)(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ const uint8_t *second_pred,
+ const JNT_COMP_PARAMS *jcp_param);
+
+typedef unsigned int (*aom_jnt_subp_avg_variance_fn_t)(
+ const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
+ int b_stride, unsigned int *sse, const uint8_t *second_pred,
+ const JNT_COMP_PARAMS *jcp_param);
+#endif // CONFIG_JNT_COMP
+
#if CONFIG_AV1
typedef unsigned int (*aom_masked_sad_fn_t)(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
@@ -95,6 +107,10 @@
aom_obmc_sad_fn_t osdf;
aom_obmc_variance_fn_t ovf;
aom_obmc_subpixvariance_fn_t osvf;
+#if CONFIG_JNT_COMP
+ aom_jnt_sad_avg_fn_t jsdaf;
+ aom_jnt_subp_avg_variance_fn_t jsvaf;
+#endif // CONFIG_JNT_COMP
} aom_variance_fn_ptr_t;
#endif // CONFIG_AV1
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 37236b6..cdca887 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -625,6 +625,13 @@
} CFL_CTX;
#endif // CONFIG_CFL
+#if CONFIG_JNT_COMP
+typedef struct jnt_comp_params {
+ int fwd_offset;
+ int bck_offset;
+} JNT_COMP_PARAMS;
+#endif // CONFIG_JNT_COMP
+
typedef struct macroblockd {
struct macroblockd_plane plane[MAX_MB_PLANE];
uint8_t bmode_blocks_wl;
@@ -745,6 +752,10 @@
#if CONFIG_CFL
CFL_CTX *cfl;
#endif
+
+#if CONFIG_JNT_COMP
+ JNT_COMP_PARAMS jcp_param;
+#endif
} MACROBLOCKD;
static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index e6aff66..dca9cc4 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -929,10 +929,11 @@
} SubpelParams;
#if CONFIG_JNT_COMP
-static void jnt_comp_weight_assign(const AV1_COMMON *cm,
- const MB_MODE_INFO *mbmi,
- ConvolveParams *conv_params,
- int is_compound) {
+void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+ int order_idx, int *fwd_offset, int *bck_offset,
+ int is_compound) {
+ assert(fwd_offset != NULL && bck_offset != NULL);
+
if (is_compound) {
int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
@@ -947,8 +948,8 @@
fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
}
- conv_params->bck_offset = abs(cur_frame_index - bck_frame_index);
- conv_params->fwd_offset = abs(fwd_frame_index - cur_frame_index);
+ *bck_offset = abs(cur_frame_index - bck_frame_index);
+ *fwd_offset = abs(fwd_frame_index - cur_frame_index);
const double fwd = abs(fwd_frame_index - cur_frame_index);
const double bck = abs(cur_frame_index - bck_frame_index);
@@ -967,22 +968,21 @@
for (quant_dist_idx = 0; quant_dist_idx < 4; ++quant_dist_idx) {
if (ratio < quant_dist_category[quant_dist_idx]) break;
}
- conv_params->fwd_offset =
- quant_dist_lookup_table[0][quant_dist_idx][order];
- conv_params->bck_offset =
- quant_dist_lookup_table[0][quant_dist_idx][1 - order];
+ *fwd_offset = quant_dist_lookup_table[order_idx][quant_dist_idx][order];
+ *bck_offset =
+ quant_dist_lookup_table[order_idx][quant_dist_idx][1 - order];
} else {
- conv_params->fwd_offset = (DIST_PRECISION >> 1);
- conv_params->bck_offset = (DIST_PRECISION >> 1);
+ *fwd_offset = (DIST_PRECISION >> 1);
+ *bck_offset = (DIST_PRECISION >> 1);
}
if (mbmi->compound_idx) {
- conv_params->fwd_offset = -1;
- conv_params->bck_offset = -1;
+ *fwd_offset = -1;
+ *bck_offset = -1;
}
} else {
- conv_params->bck_offset = -1;
- conv_params->fwd_offset = -1;
+ *bck_offset = -1;
+ *fwd_offset = -1;
}
}
#endif // CONFIG_JNT_COMP
@@ -1288,7 +1288,8 @@
ConvolveParams conv_params =
get_conv_params_no_round(ref, ref, plane, tmp_dst, MAX_SB_SIZE);
#if CONFIG_JNT_COMP
- jnt_comp_weight_assign(cm, &mi->mbmi, &conv_params, is_compound);
+ av1_jnt_comp_weight_assign(cm, &mi->mbmi, 0, &conv_params.fwd_offset,
+ &conv_params.bck_offset, is_compound);
#endif // CONFIG_JNT_COMP
#else
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 441fe79..272e4f6 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -560,6 +560,12 @@
uint8_t *ext_dst1[3],
int ext_dst_stride1[3]);
+#if CONFIG_JNT_COMP
+void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+ int order_idx, int *fwd_offset, int *bck_offset,
+ int is_compound);
+#endif // CONFIG_JNT_COMP
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 1990e70..5d6ab41 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2758,6 +2758,19 @@
av1_set_speed_features_framesize_independent(cpi);
av1_set_speed_features_framesize_dependent(cpi);
+#if CONFIG_JNT_COMP
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF, JSDAF, JSVAF) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].sdaf = SDAF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svaf = SVAF; \
+ cpi->fn_ptr[BT].sdx3f = SDX3F; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF; \
+ cpi->fn_ptr[BT].jsdaf = JSDAF; \
+ cpi->fn_ptr[BT].jsvaf = JSVAF;
+#else // CONFIG_JNT_COMP
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
@@ -2767,7 +2780,142 @@
cpi->fn_ptr[BT].sdx3f = SDX3F; \
cpi->fn_ptr[BT].sdx8f = SDX8F; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
+#endif // CONFIG_JNT_COMP
+#if CONFIG_JNT_COMP
+#if CONFIG_EXT_PARTITION_TYPES
+ BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
+ aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL,
+ aom_sad4x16x4d, aom_jnt_sad4x16_avg_c,
+ aom_jnt_sub_pixel_avg_variance4x16_c)
+
+ BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4,
+ aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, NULL, NULL,
+ aom_sad16x4x4d, aom_jnt_sad16x4_avg_c,
+ aom_jnt_sub_pixel_avg_variance16x4_c)
+
+ BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32,
+ aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, NULL, NULL,
+ aom_sad8x32x4d, aom_jnt_sad8x32_avg_c,
+ aom_jnt_sub_pixel_avg_variance8x32_c)
+
+ BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
+ aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL,
+ aom_sad32x8x4d, aom_jnt_sad32x8_avg_c,
+ aom_jnt_sub_pixel_avg_variance32x8_c)
+
+ BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64,
+ aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64, NULL, NULL,
+ aom_sad16x64x4d, aom_jnt_sad16x64_avg_c,
+ aom_jnt_sub_pixel_avg_variance16x64_c)
+
+ BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16,
+ aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, NULL, NULL,
+ aom_sad64x16x4d, aom_jnt_sad64x16_avg_c,
+ aom_jnt_sub_pixel_avg_variance64x16_c)
+
+#if CONFIG_EXT_PARTITION
+ BFP(BLOCK_32X128, aom_sad32x128, aom_sad32x128_avg, aom_variance32x128,
+ aom_sub_pixel_variance32x128, aom_sub_pixel_avg_variance32x128, NULL,
+ NULL, aom_sad32x128x4d, aom_jnt_sad32x128_avg_c,
+ aom_jnt_sub_pixel_avg_variance32x128_c)
+
+ BFP(BLOCK_128X32, aom_sad128x32, aom_sad128x32_avg, aom_variance128x32,
+ aom_sub_pixel_variance128x32, aom_sub_pixel_avg_variance128x32, NULL,
+ NULL, aom_sad128x32x4d, aom_jnt_sad128x32_avg_c,
+ aom_jnt_sub_pixel_avg_variance128x32_c)
+#endif // CONFIG_EXT_PARTITION
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_PARTITION
+ BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
+ aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128,
+ aom_sad128x128x3, aom_sad128x128x8, aom_sad128x128x4d,
+ aom_jnt_sad128x128_avg_c, aom_jnt_sub_pixel_avg_variance128x128_c)
+
+ BFP(BLOCK_128X64, aom_sad128x64, aom_sad128x64_avg, aom_variance128x64,
+ aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64, NULL,
+ NULL, aom_sad128x64x4d, aom_jnt_sad128x64_avg_c,
+ aom_jnt_sub_pixel_avg_variance128x64_c)
+
+ BFP(BLOCK_64X128, aom_sad64x128, aom_sad64x128_avg, aom_variance64x128,
+ aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128, NULL,
+ NULL, aom_sad64x128x4d, aom_jnt_sad64x128_avg_c,
+ aom_jnt_sub_pixel_avg_variance64x128_c)
+#endif // CONFIG_EXT_PARTITION
+
+ BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16,
+ aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL,
+ aom_sad32x16x4d, aom_jnt_sad32x16_avg_c,
+ aom_jnt_sub_pixel_avg_variance32x16_c)
+
+ BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg, aom_variance16x32,
+ aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, NULL, NULL,
+ aom_sad16x32x4d, aom_jnt_sad16x32_avg_c,
+ aom_jnt_sub_pixel_avg_variance16x32_c)
+
+ BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg, aom_variance64x32,
+ aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, NULL, NULL,
+ aom_sad64x32x4d, aom_jnt_sad64x32_avg_c,
+ aom_jnt_sub_pixel_avg_variance64x32_c)
+
+ BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg, aom_variance32x64,
+ aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, NULL, NULL,
+ aom_sad32x64x4d, aom_jnt_sad32x64_avg_c,
+ aom_jnt_sub_pixel_avg_variance32x64_c)
+
+ BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg, aom_variance32x32,
+ aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32,
+ aom_sad32x32x3, aom_sad32x32x8, aom_sad32x32x4d, aom_jnt_sad32x32_avg_c,
+ aom_jnt_sub_pixel_avg_variance32x32_c)
+
+ BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg, aom_variance64x64,
+ aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64,
+ aom_sad64x64x3, aom_sad64x64x8, aom_sad64x64x4d, aom_jnt_sad64x64_avg_c,
+ aom_jnt_sub_pixel_avg_variance64x64_c)
+
+ BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg, aom_variance16x16,
+ aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16,
+ aom_sad16x16x3, aom_sad16x16x8, aom_sad16x16x4d, aom_jnt_sad16x16_avg_c,
+ aom_jnt_sub_pixel_avg_variance16x16_c)
+
+ BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg, aom_variance16x8,
+ aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, aom_sad16x8x3,
+ aom_sad16x8x8, aom_sad16x8x4d, aom_jnt_sad16x8_avg_c,
+ aom_jnt_sub_pixel_avg_variance16x8_c)
+
+ BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg, aom_variance8x16,
+ aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, aom_sad8x16x3,
+ aom_sad8x16x8, aom_sad8x16x4d, aom_jnt_sad8x16_avg_c,
+ aom_jnt_sub_pixel_avg_variance8x16_c)
+
+ BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg, aom_variance8x8,
+ aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x3,
+ aom_sad8x8x8, aom_sad8x8x4d, aom_jnt_sad8x8_avg_c,
+ aom_jnt_sub_pixel_avg_variance8x8_c)
+
+ BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg, aom_variance8x4,
+ aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, NULL,
+ aom_sad8x4x8, aom_sad8x4x4d, aom_jnt_sad8x4_avg_c,
+ aom_jnt_sub_pixel_avg_variance8x4_c)
+
+ BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg, aom_variance4x8,
+ aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, NULL,
+ aom_sad4x8x8, aom_sad4x8x4d, aom_jnt_sad4x8_avg_c,
+ aom_jnt_sub_pixel_avg_variance4x8_c)
+
+ BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4,
+ aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
+ aom_sad4x4x8, aom_sad4x4x4d, aom_jnt_sad4x4_avg_c,
+ aom_jnt_sub_pixel_avg_variance4x4_c)
+
+ BFP(BLOCK_2X2, NULL, NULL, aom_variance2x2, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL)
+ BFP(BLOCK_2X4, NULL, NULL, aom_variance2x4, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL)
+ BFP(BLOCK_4X2, NULL, NULL, aom_variance4x2, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL)
+#else // CONFIG_JNT_COMP
#if CONFIG_EXT_PARTITION_TYPES
BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL,
@@ -2818,59 +2966,6 @@
NULL, aom_sad64x128x4d)
#endif // CONFIG_EXT_PARTITION
-#if CONFIG_JNT_COMP
- BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg_c, aom_variance32x16,
- aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL,
- aom_sad32x16x4d)
-
- BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg_c, aom_variance16x32,
- aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, NULL, NULL,
- aom_sad16x32x4d)
-
- BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg_c, aom_variance64x32,
- aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, NULL, NULL,
- aom_sad64x32x4d)
-
- BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg_c, aom_variance32x64,
- aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, NULL, NULL,
- aom_sad32x64x4d)
-
- BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg_c, aom_variance32x32,
- aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32,
- aom_sad32x32x3, aom_sad32x32x8, aom_sad32x32x4d)
-
- BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg_c, aom_variance64x64,
- aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64,
- aom_sad64x64x3, aom_sad64x64x8, aom_sad64x64x4d)
-
- BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg_c, aom_variance16x16,
- aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16,
- aom_sad16x16x3, aom_sad16x16x8, aom_sad16x16x4d)
-
- BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg_c, aom_variance16x8,
- aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, aom_sad16x8x3,
- aom_sad16x8x8, aom_sad16x8x4d)
-
- BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg_c, aom_variance8x16,
- aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, aom_sad8x16x3,
- aom_sad8x16x8, aom_sad8x16x4d)
-
- BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg_c, aom_variance8x8,
- aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x3,
- aom_sad8x8x8, aom_sad8x8x4d)
-
- BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg_c, aom_variance8x4,
- aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, NULL,
- aom_sad8x4x8, aom_sad8x4x4d)
-
- BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg_c, aom_variance4x8,
- aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, NULL,
- aom_sad4x8x8, aom_sad4x8x4d)
-
- BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg_c, aom_variance4x4,
- aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
- aom_sad4x4x8, aom_sad4x4x4d)
-#else
BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16,
aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL,
aom_sad32x16x4d)
@@ -2922,11 +3017,11 @@
BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4,
aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
aom_sad4x4x8, aom_sad4x4x4d)
-#endif // CONFIG_JNT_COMP
BFP(BLOCK_2X2, NULL, NULL, aom_variance2x2, NULL, NULL, NULL, NULL, NULL)
BFP(BLOCK_2X4, NULL, NULL, aom_variance2x4, NULL, NULL, NULL, NULL, NULL)
BFP(BLOCK_4X2, NULL, NULL, aom_variance4x2, NULL, NULL, NULL, NULL, NULL)
+#endif // CONFIG_JNT_COMP
#define OBFP(BT, OSDF, OVF, OSVF) \
cpi->fn_ptr[BT].osdf = OSDF; \
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index f4d132e..63f8105 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -176,6 +176,39 @@
}
/* checks if (r, c) has better score than previous best */
+#if CONFIG_JNT_COMP
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ if (second_pred == NULL) { \
+ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse); \
+ } else if (mask) { \
+ thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, second_pred, mask, \
+ mask_stride, invert_mask, &sse); \
+ } else { \
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1) \
+ thismse = vfp->jsvaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse, second_pred, \
+ &xd->jcp_param); \
+ else \
+ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse, second_pred); \
+ } \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else // CONFIG_JNT_COMP
#define CHECK_BETTER(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
MV this_mv = { r, c }; \
@@ -201,6 +234,7 @@
} else { \
v = INT_MAX; \
}
+#endif // CONFIG_JNT_COMP
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
@@ -345,15 +379,18 @@
vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
} else {
DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
- if (mask)
+ if (mask) {
aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
mask, mask_stride, invert_mask);
- else
+ } else {
#if CONFIG_JNT_COMP
- aom_comp_avg_pred_c(comp_pred, second_pred, w, h, y + offset, y_stride);
-#else
- aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
+ y_stride, &xd->jcp_param);
+ else
#endif // CONFIG_JNT_COMP
+ aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ }
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
}
} else {
@@ -365,15 +402,18 @@
(void)xd;
if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
- if (mask)
+ if (mask) {
aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
mask, mask_stride, invert_mask);
- else
+ } else {
#if CONFIG_JNT_COMP
- aom_comp_avg_pred_c(comp_pred, second_pred, w, h, y + offset, y_stride);
-#else
- aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
+ y_stride, &xd->jcp_param);
+ else
#endif // CONFIG_JNT_COMP
+ aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ }
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
} else {
besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
@@ -666,18 +706,21 @@
(void)xd;
#endif // CONFIG_HIGHBITDEPTH
if (second_pred != NULL) {
- if (mask)
+ if (mask) {
aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
subpel_y_q3, y, y_stride, mask,
mask_stride, invert_mask);
- else
+ } else {
#if CONFIG_JNT_COMP
- aom_comp_avg_upsampled_pred_c(pred, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride);
-#else
- aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
- subpel_y_q3, y, y_stride);
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ aom_jnt_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
+ subpel_y_q3, y, y_stride,
+ &xd->jcp_param);
+ else
#endif // CONFIG_JNT_COMP
+ aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
+ subpel_y_q3, y, y_stride);
+ }
} else {
aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
}
@@ -771,16 +814,25 @@
mask_stride, invert_mask, w, h, &sse);
} else {
const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
- if (second_pred == NULL)
+ if (second_pred == NULL) {
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
- else if (mask)
+ } else if (mask) {
thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, second_pred, mask,
mask_stride, invert_mask, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
- src_address, src_stride, &sse, second_pred);
+ } else {
+#if CONFIG_JNT_COMP
+ if (xd->jcp_param.fwd_offset != -1 &&
+ xd->jcp_param.bck_offset != -1)
+ thismse =
+ vfp->jsvaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, &sse, second_pred, &xd->jcp_param);
+ else
+#endif // CONFIG_JNT_COMP
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
}
cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
@@ -814,16 +866,24 @@
} else {
const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
- if (second_pred == NULL)
+ if (second_pred == NULL) {
thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
src_stride, &sse);
- else if (mask)
+ } else if (mask) {
thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, second_pred, mask,
mask_stride, invert_mask, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
- src_address, src_stride, &sse, second_pred);
+ } else {
+#if CONFIG_JNT_COMP
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ thismse =
+ vfp->jsvaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, &sse, second_pred, &xd->jcp_param);
+ else
+#endif // CONFIG_JNT_COMP
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
}
cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
@@ -1397,11 +1457,21 @@
const MV mv = { best_mv->row * 8, best_mv->col * 8 };
unsigned int unused;
- return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
- what->buf, what->stride, &unused, second_pred) +
- (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit)
- : 0);
+#if CONFIG_JNT_COMP
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ return vfp->jsvaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
+ what->buf, what->stride, &unused, second_pred,
+ &xd->jcp_param) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
+ else
+#endif // CONFIG_JNT_COMP
+ return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
+ what->buf, what->stride, &unused, second_pred) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
}
int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
@@ -2405,16 +2475,25 @@
clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
- if (mask)
+ if (mask) {
best_sad = fn_ptr->msdf(what->buf, what->stride,
get_buf_from_mv(in_what, best_mv), in_what->stride,
second_pred, mask, mask_stride, invert_mask) +
mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
- else
- best_sad =
- fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
- in_what->stride, second_pred) +
- mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+ } else {
+#if CONFIG_JNT_COMP
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ best_sad = fn_ptr->jsdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, best_mv),
+ in_what->stride, second_pred, &xd->jcp_param) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+ else
+#endif // CONFIG_JNT_COMP
+ best_sad = fn_ptr->sdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, best_mv),
+ in_what->stride, second_pred) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+ }
for (i = 0; i < search_range; ++i) {
int best_site = -1;
@@ -2425,14 +2504,22 @@
if (is_mv_in(&x->mv_limits, &mv)) {
unsigned int sad;
- if (mask)
+ if (mask) {
sad = fn_ptr->msdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride,
second_pred, mask, mask_stride, invert_mask);
- else
- sad = fn_ptr->sdaf(what->buf, what->stride,
- get_buf_from_mv(in_what, &mv), in_what->stride,
- second_pred);
+ } else {
+#if CONFIG_JNT_COMP
+ if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+ sad = fn_ptr->jsdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride,
+ second_pred, &xd->jcp_param);
+ else
+#endif // CONFIG_JNT_COMP
+ sad = fn_ptr->sdaf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride,
+ second_pred);
+ }
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 36c3aed..92249a1 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5789,56 +5789,6 @@
return 1;
}
-#if CONFIG_JNT_COMP
-static void jnt_comp_weight_assign(const AV1_COMMON *cm,
- const MB_MODE_INFO *mbmi, int order_idx,
- uint8_t *second_pred) {
- if (mbmi->compound_idx) {
- second_pred[4096] = -1;
- second_pred[4097] = -1;
- } else {
- int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
- int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
- int bck_frame_index = 0, fwd_frame_index = 0;
- int cur_frame_index = cm->cur_frame->cur_frame_offset;
-
- if (bck_idx >= 0) {
- bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
- }
-
- if (fwd_idx >= 0) {
- fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
- }
-
- const double fwd = abs(fwd_frame_index - cur_frame_index);
- const double bck = abs(cur_frame_index - bck_frame_index);
- int order;
- double ratio;
-
- if (COMPOUND_WEIGHT_MODE == DIST) {
- if (fwd > bck) {
- ratio = (bck != 0) ? fwd / bck : 5.0;
- order = 0;
- } else {
- ratio = (fwd != 0) ? bck / fwd : 5.0;
- order = 1;
- }
- int quant_dist_idx;
- for (quant_dist_idx = 0; quant_dist_idx < 4; ++quant_dist_idx) {
- if (ratio < quant_dist_category[quant_dist_idx]) break;
- }
- second_pred[4096] =
- quant_dist_lookup_table[order_idx][quant_dist_idx][order];
- second_pred[4097] =
- quant_dist_lookup_table[order_idx][quant_dist_idx][1 - order];
- } else {
- second_pred[4096] = (DIST_PRECISION >> 1);
- second_pred[4097] = (DIST_PRECISION >> 1);
- }
- }
-}
-#endif // CONFIG_JNT_COMP
-
static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int_mv *frame_mv,
#if CONFIG_COMPOUND_SINGLEREF
@@ -5901,13 +5851,8 @@
// Prediction buffer from second frame.
#if CONFIG_HIGHBITDEPTH
-#if CONFIG_JNT_COMP
- DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE + 2]);
- uint8_t *second_pred;
-#else
DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
uint8_t *second_pred;
-#endif // CONFIG_JNT_COMP
#else // CONFIG_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_HIGHBITDEPTH
@@ -6046,7 +5991,8 @@
#if CONFIG_JNT_COMP
const int order_idx = id != 0;
- jnt_comp_weight_assign(cm, mbmi, order_idx, second_pred);
+ av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
+ &xd->jcp_param.bck_offset, 1);
#endif // CONFIG_JNT_COMP
// Do compound motion search on the current reference frame.
@@ -6761,7 +6707,8 @@
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_JNT_COMP
- jnt_comp_weight_assign(cm, mbmi, 0, second_pred);
+ av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
+ &xd->jcp_param.bck_offset, 1);
#endif // CONFIG_JNT_COMP
if (scaled_ref_frame) {
@@ -6930,11 +6877,7 @@
// Prediction buffer from second frame.
#if CONFIG_HIGHBITDEPTH
-#if CONFIG_JNT_COMP
- DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE + 2]);
-#else
DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
-#endif // CONFIG_JNT_COMP
uint8_t *second_pred;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);