JNT_COMP: Refactor code The refactoring serves two purposes: 1. Separate code paths for jnt_comp and original compound average computation. It provides function interface for jnt_comp while leaving original compound average computation unchanged. In near future, SIMD functions can be added for jnt_comp using the interface. 2. Previous implementation uses a hack on second_pred. But it may cause segmentation fault when the test clip is small. As reported in Issue 944. This refactoring removes hacking and make it possible to address the seg fault problem in the future. Change-Id: Idd2cb99f6c77dae03d32ccfa1f9cbed1d7eed067

commit: f78632e00cb3c679b1485e9d3e34b824d878b0f4 [log] [tgz]
author: Cheng Chen <chengchen@google.com> Fri Oct 20 15:30:51 2017 -0700
committer: Jingning Han <jingning@google.com> Mon Nov 06 16:01:22 2017 +0000
tree: 4f904d1a0035371e729b83622345b8c66ae5d16f
parent: 7fc6b2ac7c85318a07b71ea3473c60d1fd88844c [diff]
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 0e78eea..0b7fbca 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -7,6 +7,7 @@
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "av1/common/enums.h"
+#include "av1/common/blockd.h"
 
 EOF
 }
@@ -829,6 +830,9 @@
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
     add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
+    if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+      add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
+    }
   }
 
   specialize qw/aom_sad128x128    avx2          sse2/;
@@ -1100,9 +1104,14 @@
   #
   add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
   specialize qw/aom_upsampled_pred sse2/;
+
   add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
   specialize qw/aom_comp_avg_upsampled_pred sse2/;
 
+  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+    add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
+  }
+
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
     specialize qw/aom_highbd_upsampled_pred sse2/;
@@ -1133,6 +1142,9 @@
     add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
     add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
     add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
+    if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+      add_proto qw/uint32_t/, "aom_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
+    }
   }
 
   specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
@@ -1309,7 +1321,6 @@
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
   specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
-
   #
   # Specialty Subpixel
   #
@@ -1326,6 +1337,9 @@
   # Comp Avg
   #
   add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
+    add_proto qw/void aom_jnt_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
+  }
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
     add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
     specialize qw/aom_highbd_12_variance64x64 sse2/;

diff --git a/aom_dsp/sad.c b/aom_dsp/sad.c
index 77587d5..871d13e 100644
--- a/aom_dsp/sad.c
+++ b/aom_dsp/sad.c

@@ -33,6 +33,28 @@
   return sad;
 }
 
+#if CONFIG_JNT_COMP
+#define sadMxN(m, n)                                                          \
+  unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride,       \
+                                    const uint8_t *ref, int ref_stride) {     \
+    return sad(src, src_stride, ref, ref_stride, m, n);                       \
+  }                                                                           \
+  unsigned int aom_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride,   \
+                                        const uint8_t *ref, int ref_stride,   \
+                                        const uint8_t *second_pred) {         \
+    uint8_t comp_pred[m * n];                                                 \
+    aom_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride);         \
+    return sad(src, src_stride, comp_pred, m, m, n);                          \
+  }                                                                           \
+  unsigned int aom_jnt_sad##m##x##n##_avg_c(                                  \
+      const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+      const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) {         \
+    uint8_t comp_pred[m * n];                                                 \
+    aom_jnt_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride,      \
+                          jcp_param);                                         \
+    return sad(src, src_stride, comp_pred, m, m, n);                          \
+  }
+#else  // CONFIG_JNT_COMP
 #define sadMxN(m, n)                                                        \
   unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride,     \
                                     const uint8_t *ref, int ref_stride) {   \
@@ -45,6 +67,7 @@
     aom_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride);     \
     return sad(src, src_stride, comp_pred, m, m, n);                        \
   }
+#endif  // CONFIG_JNT_COMP
 
 // depending on call sites, pass **ref_array to avoid & in subsequent call and
 // de-dup with 4D below.

diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index c07f46d..16ad001 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c

@@ -180,6 +180,43 @@
     return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
   }
 
+#if CONFIG_JNT_COMP
+#define SUBPIX_AVG_VAR(W, H)                                              \
+  uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                       \
+      const uint8_t *a, int a_stride, int xoffset, int yoffset,           \
+      const uint8_t *b, int b_stride, uint32_t *sse,                      \
+      const uint8_t *second_pred) {                                       \
+    uint16_t fdata3[(H + 1) * W];                                         \
+    uint8_t temp2[H * W];                                                 \
+    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                           \
+                                                                          \
+    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W,   \
+                                      bilinear_filters_2t[xoffset]);      \
+    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,         \
+                                       bilinear_filters_2t[yoffset]);     \
+                                                                          \
+    aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);                \
+                                                                          \
+    return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);         \
+  }                                                                       \
+  uint32_t aom_jnt_sub_pixel_avg_variance##W##x##H##_c(                   \
+      const uint8_t *a, int a_stride, int xoffset, int yoffset,           \
+      const uint8_t *b, int b_stride, uint32_t *sse,                      \
+      const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) {     \
+    uint16_t fdata3[(H + 1) * W];                                         \
+    uint8_t temp2[H * W];                                                 \
+    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                           \
+                                                                          \
+    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W,   \
+                                      bilinear_filters_2t[xoffset]);      \
+    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,         \
+                                       bilinear_filters_2t[yoffset]);     \
+                                                                          \
+    aom_jnt_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
+                                                                          \
+    return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);         \
+  }
+#else  // CONFIG_JNT_COMP
 #define SUBPIX_AVG_VAR(W, H)                                            \
   uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                     \
       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
@@ -198,6 +235,7 @@
                                                                         \
     return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
   }
+#endif  // CONFIG_JNT_COMP
 
 /* Identical to the variance call except it takes an additional parameter, sum,
  * and returns that value using pass-by-reference instead of returning
@@ -275,23 +313,11 @@
 void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
                          int height, const uint8_t *ref, int ref_stride) {
   int i, j;
-#if CONFIG_JNT_COMP
-  int bck_offset = pred[4096];
-  int fwd_offset = pred[4097];
-  double sum = bck_offset + fwd_offset;
-#endif  // CONFIG_JNT_COMP
 
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
-#if CONFIG_JNT_COMP
-      int tmp = pred[j] * fwd_offset + ref[j] * bck_offset;
-      tmp = (int)(0.5 + tmp / sum);
-      if (tmp > 255) tmp = 255;
-      comp_pred[j] = (uint8_t)tmp;
-#else
       const int tmp = pred[j] + ref[j];
       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
-#endif  // CONFIG_JNT_COMP
     }
     comp_pred += width;
     pred += width;
@@ -352,36 +378,66 @@
                                    int subpel_y_q3, const uint8_t *ref,
                                    int ref_stride) {
   int i, j;
-#if CONFIG_JNT_COMP
-  int bck_offset = pred[4096];
-  int fwd_offset = pred[4097];
-  double sum = bck_offset + fwd_offset;
-#endif  // CONFIG_JNT_COMP
 
-#if CONFIG_JNT_COMP
-  aom_upsampled_pred_c(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
-                       ref_stride);
-#else
   aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
                      ref_stride);
-#endif  // CONFIG_JNT_COMP
-
   for (i = 0; i < height; i++) {
     for (j = 0; j < width; j++) {
-#if CONFIG_JNT_COMP
-      int tmp = pred[j] * fwd_offset + comp_pred[j] * bck_offset;
-      tmp = (int)(0.5 + tmp / sum);
-      if (tmp > 255) tmp = 255;
-      comp_pred[j] = (uint8_t)tmp;
-#else
       comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
-#endif  // CONFIG_JNT_COMP
     }
     comp_pred += width;
     pred += width;
   }
 }
 
+#if CONFIG_JNT_COMP
+void aom_jnt_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+                             int height, const uint8_t *ref, int ref_stride,
+                             const JNT_COMP_PARAMS *jcp_param) {
+  int i, j;
+  const int fwd_offset = jcp_param->fwd_offset;
+  const int bck_offset = jcp_param->bck_offset;
+  double sum = bck_offset + fwd_offset;
+
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
+      tmp = (int)(0.5 + tmp / sum);
+      if (tmp > 255) tmp = 255;
+      comp_pred[j] = (uint8_t)tmp;
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
+
+void aom_jnt_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
+                                       int width, int height, int subpel_x_q3,
+                                       int subpel_y_q3, const uint8_t *ref,
+                                       int ref_stride,
+                                       const JNT_COMP_PARAMS *jcp_param) {
+  int i, j;
+  const int fwd_offset = jcp_param->fwd_offset;
+  const int bck_offset = jcp_param->bck_offset;
+  double sum = bck_offset + fwd_offset;
+
+  aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
+                     ref_stride);
+
+  for (i = 0; i < height; i++) {
+    for (j = 0; j < width; j++) {
+      int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
+      tmp = (int)(0.5 + tmp / sum);
+      if (tmp > 255) tmp = 255;
+      comp_pred[j] = (uint8_t)tmp;
+    }
+    comp_pred += width;
+    pred += width;
+  }
+}
+#endif  // CONFIG_JNT_COMP
+
 #if CONFIG_HIGHBITDEPTH
 static void highbd_variance64(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,

diff --git a/aom_dsp/variance.h b/aom_dsp/variance.h
index d12cd91..d4a1e83 100644
--- a/aom_dsp/variance.h
+++ b/aom_dsp/variance.h

@@ -54,6 +54,18 @@
     const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
     int b_stride, unsigned int *sse, const uint8_t *second_pred);
 
+#if CONFIG_JNT_COMP
+typedef unsigned int (*aom_jnt_sad_avg_fn_t)(const uint8_t *a, int a_stride,
+                                             const uint8_t *b, int b_stride,
+                                             const uint8_t *second_pred,
+                                             const JNT_COMP_PARAMS *jcp_param);
+
+typedef unsigned int (*aom_jnt_subp_avg_variance_fn_t)(
+    const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
+    int b_stride, unsigned int *sse, const uint8_t *second_pred,
+    const JNT_COMP_PARAMS *jcp_param);
+#endif  // CONFIG_JNT_COMP
+
 #if CONFIG_AV1
 typedef unsigned int (*aom_masked_sad_fn_t)(const uint8_t *src, int src_stride,
                                             const uint8_t *ref, int ref_stride,
@@ -95,6 +107,10 @@
   aom_obmc_sad_fn_t osdf;
   aom_obmc_variance_fn_t ovf;
   aom_obmc_subpixvariance_fn_t osvf;
+#if CONFIG_JNT_COMP
+  aom_jnt_sad_avg_fn_t jsdaf;
+  aom_jnt_subp_avg_variance_fn_t jsvaf;
+#endif  // CONFIG_JNT_COMP
 } aom_variance_fn_ptr_t;
 #endif  // CONFIG_AV1
 

diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 37236b6..cdca887 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h

@@ -625,6 +625,13 @@
 } CFL_CTX;
 #endif  // CONFIG_CFL
 
+#if CONFIG_JNT_COMP
+typedef struct jnt_comp_params {
+  int fwd_offset;
+  int bck_offset;
+} JNT_COMP_PARAMS;
+#endif  // CONFIG_JNT_COMP
+
 typedef struct macroblockd {
   struct macroblockd_plane plane[MAX_MB_PLANE];
   uint8_t bmode_blocks_wl;
@@ -745,6 +752,10 @@
 #if CONFIG_CFL
   CFL_CTX *cfl;
 #endif
+
+#if CONFIG_JNT_COMP
+  JNT_COMP_PARAMS jcp_param;
+#endif
 } MACROBLOCKD;
 
 static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {

diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index e6aff66..dca9cc4 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c

@@ -929,10 +929,11 @@
 } SubpelParams;
 
 #if CONFIG_JNT_COMP
-static void jnt_comp_weight_assign(const AV1_COMMON *cm,
-                                   const MB_MODE_INFO *mbmi,
-                                   ConvolveParams *conv_params,
-                                   int is_compound) {
+void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+                                int order_idx, int *fwd_offset, int *bck_offset,
+                                int is_compound) {
+  assert(fwd_offset != NULL && bck_offset != NULL);
+
   if (is_compound) {
     int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
     int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
@@ -947,8 +948,8 @@
       fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
     }
 
-    conv_params->bck_offset = abs(cur_frame_index - bck_frame_index);
-    conv_params->fwd_offset = abs(fwd_frame_index - cur_frame_index);
+    *bck_offset = abs(cur_frame_index - bck_frame_index);
+    *fwd_offset = abs(fwd_frame_index - cur_frame_index);
 
     const double fwd = abs(fwd_frame_index - cur_frame_index);
     const double bck = abs(cur_frame_index - bck_frame_index);
@@ -967,22 +968,21 @@
       for (quant_dist_idx = 0; quant_dist_idx < 4; ++quant_dist_idx) {
         if (ratio < quant_dist_category[quant_dist_idx]) break;
       }
-      conv_params->fwd_offset =
-          quant_dist_lookup_table[0][quant_dist_idx][order];
-      conv_params->bck_offset =
-          quant_dist_lookup_table[0][quant_dist_idx][1 - order];
+      *fwd_offset = quant_dist_lookup_table[order_idx][quant_dist_idx][order];
+      *bck_offset =
+          quant_dist_lookup_table[order_idx][quant_dist_idx][1 - order];
     } else {
-      conv_params->fwd_offset = (DIST_PRECISION >> 1);
-      conv_params->bck_offset = (DIST_PRECISION >> 1);
+      *fwd_offset = (DIST_PRECISION >> 1);
+      *bck_offset = (DIST_PRECISION >> 1);
     }
 
     if (mbmi->compound_idx) {
-      conv_params->fwd_offset = -1;
-      conv_params->bck_offset = -1;
+      *fwd_offset = -1;
+      *bck_offset = -1;
     }
   } else {
-    conv_params->bck_offset = -1;
-    conv_params->fwd_offset = -1;
+    *bck_offset = -1;
+    *fwd_offset = -1;
   }
 }
 #endif  // CONFIG_JNT_COMP
@@ -1288,7 +1288,8 @@
     ConvolveParams conv_params =
         get_conv_params_no_round(ref, ref, plane, tmp_dst, MAX_SB_SIZE);
 #if CONFIG_JNT_COMP
-    jnt_comp_weight_assign(cm, &mi->mbmi, &conv_params, is_compound);
+    av1_jnt_comp_weight_assign(cm, &mi->mbmi, 0, &conv_params.fwd_offset,
+                               &conv_params.bck_offset, is_compound);
 #endif  // CONFIG_JNT_COMP
 
 #else

diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 441fe79..272e4f6 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h

@@ -560,6 +560,12 @@
                                               uint8_t *ext_dst1[3],
                                               int ext_dst_stride1[3]);
 
+#if CONFIG_JNT_COMP
+void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
+                                int order_idx, int *fwd_offset, int *bck_offset,
+                                int is_compound);
+#endif  // CONFIG_JNT_COMP
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 1990e70..5d6ab41 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c

@@ -2758,6 +2758,19 @@
   av1_set_speed_features_framesize_independent(cpi);
   av1_set_speed_features_framesize_dependent(cpi);
 
+#if CONFIG_JNT_COMP
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF, JSDAF, JSVAF) \
+  cpi->fn_ptr[BT].sdf = SDF;                                                  \
+  cpi->fn_ptr[BT].sdaf = SDAF;                                                \
+  cpi->fn_ptr[BT].vf = VF;                                                    \
+  cpi->fn_ptr[BT].svf = SVF;                                                  \
+  cpi->fn_ptr[BT].svaf = SVAF;                                                \
+  cpi->fn_ptr[BT].sdx3f = SDX3F;                                              \
+  cpi->fn_ptr[BT].sdx8f = SDX8F;                                              \
+  cpi->fn_ptr[BT].sdx4df = SDX4DF;                                            \
+  cpi->fn_ptr[BT].jsdaf = JSDAF;                                              \
+  cpi->fn_ptr[BT].jsvaf = JSVAF;
+#else  // CONFIG_JNT_COMP
 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \
   cpi->fn_ptr[BT].sdf = SDF;                                    \
   cpi->fn_ptr[BT].sdaf = SDAF;                                  \
@@ -2767,7 +2780,142 @@
   cpi->fn_ptr[BT].sdx3f = SDX3F;                                \
   cpi->fn_ptr[BT].sdx8f = SDX8F;                                \
   cpi->fn_ptr[BT].sdx4df = SDX4DF;
+#endif  // CONFIG_JNT_COMP
 
+#if CONFIG_JNT_COMP
+#if CONFIG_EXT_PARTITION_TYPES
+  BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
+      aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL,
+      aom_sad4x16x4d, aom_jnt_sad4x16_avg_c,
+      aom_jnt_sub_pixel_avg_variance4x16_c)
+
+  BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4,
+      aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, NULL, NULL,
+      aom_sad16x4x4d, aom_jnt_sad16x4_avg_c,
+      aom_jnt_sub_pixel_avg_variance16x4_c)
+
+  BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32,
+      aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, NULL, NULL,
+      aom_sad8x32x4d, aom_jnt_sad8x32_avg_c,
+      aom_jnt_sub_pixel_avg_variance8x32_c)
+
+  BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8,
+      aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL,
+      aom_sad32x8x4d, aom_jnt_sad32x8_avg_c,
+      aom_jnt_sub_pixel_avg_variance32x8_c)
+
+  BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64,
+      aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64, NULL, NULL,
+      aom_sad16x64x4d, aom_jnt_sad16x64_avg_c,
+      aom_jnt_sub_pixel_avg_variance16x64_c)
+
+  BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16,
+      aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, NULL, NULL,
+      aom_sad64x16x4d, aom_jnt_sad64x16_avg_c,
+      aom_jnt_sub_pixel_avg_variance64x16_c)
+
+#if CONFIG_EXT_PARTITION
+  BFP(BLOCK_32X128, aom_sad32x128, aom_sad32x128_avg, aom_variance32x128,
+      aom_sub_pixel_variance32x128, aom_sub_pixel_avg_variance32x128, NULL,
+      NULL, aom_sad32x128x4d, aom_jnt_sad32x128_avg_c,
+      aom_jnt_sub_pixel_avg_variance32x128_c)
+
+  BFP(BLOCK_128X32, aom_sad128x32, aom_sad128x32_avg, aom_variance128x32,
+      aom_sub_pixel_variance128x32, aom_sub_pixel_avg_variance128x32, NULL,
+      NULL, aom_sad128x32x4d, aom_jnt_sad128x32_avg_c,
+      aom_jnt_sub_pixel_avg_variance128x32_c)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_PARTITION
+  BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
+      aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128,
+      aom_sad128x128x3, aom_sad128x128x8, aom_sad128x128x4d,
+      aom_jnt_sad128x128_avg_c, aom_jnt_sub_pixel_avg_variance128x128_c)
+
+  BFP(BLOCK_128X64, aom_sad128x64, aom_sad128x64_avg, aom_variance128x64,
+      aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64, NULL,
+      NULL, aom_sad128x64x4d, aom_jnt_sad128x64_avg_c,
+      aom_jnt_sub_pixel_avg_variance128x64_c)
+
+  BFP(BLOCK_64X128, aom_sad64x128, aom_sad64x128_avg, aom_variance64x128,
+      aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128, NULL,
+      NULL, aom_sad64x128x4d, aom_jnt_sad64x128_avg_c,
+      aom_jnt_sub_pixel_avg_variance64x128_c)
+#endif  // CONFIG_EXT_PARTITION
+
+  BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16,
+      aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL,
+      aom_sad32x16x4d, aom_jnt_sad32x16_avg_c,
+      aom_jnt_sub_pixel_avg_variance32x16_c)
+
+  BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg, aom_variance16x32,
+      aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, NULL, NULL,
+      aom_sad16x32x4d, aom_jnt_sad16x32_avg_c,
+      aom_jnt_sub_pixel_avg_variance16x32_c)
+
+  BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg, aom_variance64x32,
+      aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, NULL, NULL,
+      aom_sad64x32x4d, aom_jnt_sad64x32_avg_c,
+      aom_jnt_sub_pixel_avg_variance64x32_c)
+
+  BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg, aom_variance32x64,
+      aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, NULL, NULL,
+      aom_sad32x64x4d, aom_jnt_sad32x64_avg_c,
+      aom_jnt_sub_pixel_avg_variance32x64_c)
+
+  BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg, aom_variance32x32,
+      aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32,
+      aom_sad32x32x3, aom_sad32x32x8, aom_sad32x32x4d, aom_jnt_sad32x32_avg_c,
+      aom_jnt_sub_pixel_avg_variance32x32_c)
+
+  BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg, aom_variance64x64,
+      aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64,
+      aom_sad64x64x3, aom_sad64x64x8, aom_sad64x64x4d, aom_jnt_sad64x64_avg_c,
+      aom_jnt_sub_pixel_avg_variance64x64_c)
+
+  BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg, aom_variance16x16,
+      aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16,
+      aom_sad16x16x3, aom_sad16x16x8, aom_sad16x16x4d, aom_jnt_sad16x16_avg_c,
+      aom_jnt_sub_pixel_avg_variance16x16_c)
+
+  BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg, aom_variance16x8,
+      aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, aom_sad16x8x3,
+      aom_sad16x8x8, aom_sad16x8x4d, aom_jnt_sad16x8_avg_c,
+      aom_jnt_sub_pixel_avg_variance16x8_c)
+
+  BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg, aom_variance8x16,
+      aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, aom_sad8x16x3,
+      aom_sad8x16x8, aom_sad8x16x4d, aom_jnt_sad8x16_avg_c,
+      aom_jnt_sub_pixel_avg_variance8x16_c)
+
+  BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg, aom_variance8x8,
+      aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x3,
+      aom_sad8x8x8, aom_sad8x8x4d, aom_jnt_sad8x8_avg_c,
+      aom_jnt_sub_pixel_avg_variance8x8_c)
+
+  BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg, aom_variance8x4,
+      aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, NULL,
+      aom_sad8x4x8, aom_sad8x4x4d, aom_jnt_sad8x4_avg_c,
+      aom_jnt_sub_pixel_avg_variance8x4_c)
+
+  BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg, aom_variance4x8,
+      aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, NULL,
+      aom_sad4x8x8, aom_sad4x8x4d, aom_jnt_sad4x8_avg_c,
+      aom_jnt_sub_pixel_avg_variance4x8_c)
+
+  BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4,
+      aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
+      aom_sad4x4x8, aom_sad4x4x4d, aom_jnt_sad4x4_avg_c,
+      aom_jnt_sub_pixel_avg_variance4x4_c)
+
+  BFP(BLOCK_2X2, NULL, NULL, aom_variance2x2, NULL, NULL, NULL, NULL, NULL,
+      NULL, NULL)
+  BFP(BLOCK_2X4, NULL, NULL, aom_variance2x4, NULL, NULL, NULL, NULL, NULL,
+      NULL, NULL)
+  BFP(BLOCK_4X2, NULL, NULL, aom_variance4x2, NULL, NULL, NULL, NULL, NULL,
+      NULL, NULL)
+#else  // CONFIG_JNT_COMP
 #if CONFIG_EXT_PARTITION_TYPES
   BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16,
       aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL,
@@ -2818,59 +2966,6 @@
       NULL, aom_sad64x128x4d)
 #endif  // CONFIG_EXT_PARTITION
 
-#if CONFIG_JNT_COMP
-  BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg_c, aom_variance32x16,
-      aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL,
-      aom_sad32x16x4d)
-
-  BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg_c, aom_variance16x32,
-      aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, NULL, NULL,
-      aom_sad16x32x4d)
-
-  BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg_c, aom_variance64x32,
-      aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, NULL, NULL,
-      aom_sad64x32x4d)
-
-  BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg_c, aom_variance32x64,
-      aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, NULL, NULL,
-      aom_sad32x64x4d)
-
-  BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg_c, aom_variance32x32,
-      aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32,
-      aom_sad32x32x3, aom_sad32x32x8, aom_sad32x32x4d)
-
-  BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg_c, aom_variance64x64,
-      aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64,
-      aom_sad64x64x3, aom_sad64x64x8, aom_sad64x64x4d)
-
-  BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg_c, aom_variance16x16,
-      aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16,
-      aom_sad16x16x3, aom_sad16x16x8, aom_sad16x16x4d)
-
-  BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg_c, aom_variance16x8,
-      aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, aom_sad16x8x3,
-      aom_sad16x8x8, aom_sad16x8x4d)
-
-  BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg_c, aom_variance8x16,
-      aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, aom_sad8x16x3,
-      aom_sad8x16x8, aom_sad8x16x4d)
-
-  BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg_c, aom_variance8x8,
-      aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x3,
-      aom_sad8x8x8, aom_sad8x8x4d)
-
-  BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg_c, aom_variance8x4,
-      aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, NULL,
-      aom_sad8x4x8, aom_sad8x4x4d)
-
-  BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg_c, aom_variance4x8,
-      aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, NULL,
-      aom_sad4x8x8, aom_sad4x8x4d)
-
-  BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg_c, aom_variance4x4,
-      aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
-      aom_sad4x4x8, aom_sad4x4x4d)
-#else
   BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16,
       aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL,
       aom_sad32x16x4d)
@@ -2922,11 +3017,11 @@
   BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4,
       aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3,
       aom_sad4x4x8, aom_sad4x4x4d)
-#endif  // CONFIG_JNT_COMP
 
   BFP(BLOCK_2X2, NULL, NULL, aom_variance2x2, NULL, NULL, NULL, NULL, NULL)
   BFP(BLOCK_2X4, NULL, NULL, aom_variance2x4, NULL, NULL, NULL, NULL, NULL)
   BFP(BLOCK_4X2, NULL, NULL, aom_variance4x2, NULL, NULL, NULL, NULL, NULL)
+#endif  // CONFIG_JNT_COMP
 
 #define OBFP(BT, OSDF, OVF, OSVF) \
   cpi->fn_ptr[BT].osdf = OSDF;    \

diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index f4d132e..63f8105 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c

@@ -176,6 +176,39 @@
 }
 
 /* checks if (r, c) has better score than previous best */
+#if CONFIG_JNT_COMP
+#define CHECK_BETTER(v, r, c)                                                \
+  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
+    MV this_mv = { r, c };                                                   \
+    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);       \
+    if (second_pred == NULL) {                                               \
+      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),     \
+                         src_address, src_stride, &sse);                     \
+    } else if (mask) {                                                       \
+      thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),    \
+                          src_address, src_stride, second_pred, mask,        \
+                          mask_stride, invert_mask, &sse);                   \
+    } else {                                                                 \
+      if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)  \
+        thismse = vfp->jsvaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+                             src_address, src_stride, &sse, second_pred,     \
+                             &xd->jcp_param);                                \
+      else                                                                   \
+        thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),  \
+                            src_address, src_stride, &sse, second_pred);     \
+    }                                                                        \
+    v += thismse;                                                            \
+    if (v < besterr) {                                                       \
+      besterr = v;                                                           \
+      br = r;                                                                \
+      bc = c;                                                                \
+      *distortion = thismse;                                                 \
+      *sse1 = sse;                                                           \
+    }                                                                        \
+  } else {                                                                   \
+    v = INT_MAX;                                                             \
+  }
+#else  // CONFIG_JNT_COMP
 #define CHECK_BETTER(v, r, c)                                             \
   if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                 \
     MV this_mv = { r, c };                                                \
@@ -201,6 +234,7 @@
   } else {                                                                \
     v = INT_MAX;                                                          \
   }
+#endif  // CONFIG_JNT_COMP
 
 #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
 
@@ -345,15 +379,18 @@
           vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
     } else {
       DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
-      if (mask)
+      if (mask) {
         aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
                            mask, mask_stride, invert_mask);
-      else
+      } else {
 #if CONFIG_JNT_COMP
-        aom_comp_avg_pred_c(comp_pred, second_pred, w, h, y + offset, y_stride);
-#else
-        aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+        if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+          aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
+                                y_stride, &xd->jcp_param);
+        else
 #endif  // CONFIG_JNT_COMP
+          aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+      }
       besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
     }
   } else {
@@ -365,15 +402,18 @@
   (void)xd;
   if (second_pred != NULL) {
     DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
-    if (mask)
+    if (mask) {
       aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
                          mask, mask_stride, invert_mask);
-    else
+    } else {
 #if CONFIG_JNT_COMP
-      aom_comp_avg_pred_c(comp_pred, second_pred, w, h, y + offset, y_stride);
-#else
-      aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+      if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+        aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset,
+                              y_stride, &xd->jcp_param);
+      else
 #endif  // CONFIG_JNT_COMP
+        aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+    }
     besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
   } else {
     besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
@@ -666,18 +706,21 @@
   (void)xd;
 #endif  // CONFIG_HIGHBITDEPTH
     if (second_pred != NULL) {
-      if (mask)
+      if (mask) {
         aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
                                      subpel_y_q3, y, y_stride, mask,
                                      mask_stride, invert_mask);
-      else
+      } else {
 #if CONFIG_JNT_COMP
-        aom_comp_avg_upsampled_pred_c(pred, second_pred, w, h, subpel_x_q3,
-                                      subpel_y_q3, y, y_stride);
-#else
-      aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
-                                  subpel_y_q3, y, y_stride);
+        if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+          aom_jnt_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
+                                          subpel_y_q3, y, y_stride,
+                                          &xd->jcp_param);
+        else
 #endif  // CONFIG_JNT_COMP
+          aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
+                                      subpel_y_q3, y, y_stride);
+      }
     } else {
       aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
     }
@@ -771,16 +814,25 @@
                                          mask_stride, invert_mask, w, h, &sse);
         } else {
           const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
-          if (second_pred == NULL)
+          if (second_pred == NULL) {
             thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                                src_address, src_stride, &sse);
-          else if (mask)
+          } else if (mask) {
             thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
                                 src_address, src_stride, second_pred, mask,
                                 mask_stride, invert_mask, &sse);
-          else
-            thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
-                                src_address, src_stride, &sse, second_pred);
+          } else {
+#if CONFIG_JNT_COMP
+            if (xd->jcp_param.fwd_offset != -1 &&
+                xd->jcp_param.bck_offset != -1)
+              thismse =
+                  vfp->jsvaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+                             src_stride, &sse, second_pred, &xd->jcp_param);
+            else
+#endif  // CONFIG_JNT_COMP
+              thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+                                  src_address, src_stride, &sse, second_pred);
+          }
         }
 
         cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
@@ -814,16 +866,24 @@
       } else {
         const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
 
-        if (second_pred == NULL)
+        if (second_pred == NULL) {
           thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
                              src_stride, &sse);
-        else if (mask)
+        } else if (mask) {
           thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, second_pred, mask,
                               mask_stride, invert_mask, &sse);
-        else
-          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
-                              src_address, src_stride, &sse, second_pred);
+        } else {
+#if CONFIG_JNT_COMP
+          if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+            thismse =
+                vfp->jsvaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+                           src_stride, &sse, second_pred, &xd->jcp_param);
+          else
+#endif  // CONFIG_JNT_COMP
+            thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+                                src_address, src_stride, &sse, second_pred);
+        }
       }
 
       cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
@@ -1397,11 +1457,21 @@
   const MV mv = { best_mv->row * 8, best_mv->col * 8 };
   unsigned int unused;
 
-  return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
-                   what->buf, what->stride, &unused, second_pred) +
-         (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
-                                   x->errorperbit)
-                     : 0);
+#if CONFIG_JNT_COMP
+  if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+    return vfp->jsvaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
+                      what->buf, what->stride, &unused, second_pred,
+                      &xd->jcp_param) +
+           (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+                                     x->errorperbit)
+                       : 0);
+  else
+#endif  // CONFIG_JNT_COMP
+    return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
+                     what->buf, what->stride, &unused, second_pred) +
+           (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+                                     x->errorperbit)
+                       : 0);
 }
 
 int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
@@ -2405,16 +2475,25 @@
 
   clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
            x->mv_limits.row_min, x->mv_limits.row_max);
-  if (mask)
+  if (mask) {
     best_sad = fn_ptr->msdf(what->buf, what->stride,
                             get_buf_from_mv(in_what, best_mv), in_what->stride,
                             second_pred, mask, mask_stride, invert_mask) +
                mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
-  else
-    best_sad =
-        fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
-                     in_what->stride, second_pred) +
-        mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+  } else {
+#if CONFIG_JNT_COMP
+    if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+      best_sad = fn_ptr->jsdaf(what->buf, what->stride,
+                               get_buf_from_mv(in_what, best_mv),
+                               in_what->stride, second_pred, &xd->jcp_param) +
+                 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+    else
+#endif  // CONFIG_JNT_COMP
+      best_sad = fn_ptr->sdaf(what->buf, what->stride,
+                              get_buf_from_mv(in_what, best_mv),
+                              in_what->stride, second_pred) +
+                 mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+  }
 
   for (i = 0; i < search_range; ++i) {
     int best_site = -1;
@@ -2425,14 +2504,22 @@
 
       if (is_mv_in(&x->mv_limits, &mv)) {
         unsigned int sad;
-        if (mask)
+        if (mask) {
           sad = fn_ptr->msdf(what->buf, what->stride,
                              get_buf_from_mv(in_what, &mv), in_what->stride,
                              second_pred, mask, mask_stride, invert_mask);
-        else
-          sad = fn_ptr->sdaf(what->buf, what->stride,
-                             get_buf_from_mv(in_what, &mv), in_what->stride,
-                             second_pred);
+        } else {
+#if CONFIG_JNT_COMP
+          if (xd->jcp_param.fwd_offset != -1 && xd->jcp_param.bck_offset != -1)
+            sad = fn_ptr->jsdaf(what->buf, what->stride,
+                                get_buf_from_mv(in_what, &mv), in_what->stride,
+                                second_pred, &xd->jcp_param);
+          else
+#endif  // CONFIG_JNT_COMP
+            sad = fn_ptr->sdaf(what->buf, what->stride,
+                               get_buf_from_mv(in_what, &mv), in_what->stride,
+                               second_pred);
+        }
         if (sad < best_sad) {
           sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 36c3aed..92249a1 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -5789,56 +5789,6 @@
   return 1;
 }
 
-#if CONFIG_JNT_COMP
-static void jnt_comp_weight_assign(const AV1_COMMON *cm,
-                                   const MB_MODE_INFO *mbmi, int order_idx,
-                                   uint8_t *second_pred) {
-  if (mbmi->compound_idx) {
-    second_pred[4096] = -1;
-    second_pred[4097] = -1;
-  } else {
-    int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
-    int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
-    int bck_frame_index = 0, fwd_frame_index = 0;
-    int cur_frame_index = cm->cur_frame->cur_frame_offset;
-
-    if (bck_idx >= 0) {
-      bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
-    }
-
-    if (fwd_idx >= 0) {
-      fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
-    }
-
-    const double fwd = abs(fwd_frame_index - cur_frame_index);
-    const double bck = abs(cur_frame_index - bck_frame_index);
-    int order;
-    double ratio;
-
-    if (COMPOUND_WEIGHT_MODE == DIST) {
-      if (fwd > bck) {
-        ratio = (bck != 0) ? fwd / bck : 5.0;
-        order = 0;
-      } else {
-        ratio = (fwd != 0) ? bck / fwd : 5.0;
-        order = 1;
-      }
-      int quant_dist_idx;
-      for (quant_dist_idx = 0; quant_dist_idx < 4; ++quant_dist_idx) {
-        if (ratio < quant_dist_category[quant_dist_idx]) break;
-      }
-      second_pred[4096] =
-          quant_dist_lookup_table[order_idx][quant_dist_idx][order];
-      second_pred[4097] =
-          quant_dist_lookup_table[order_idx][quant_dist_idx][1 - order];
-    } else {
-      second_pred[4096] = (DIST_PRECISION >> 1);
-      second_pred[4097] = (DIST_PRECISION >> 1);
-    }
-  }
-}
-#endif  // CONFIG_JNT_COMP
-
 static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize, int_mv *frame_mv,
 #if CONFIG_COMPOUND_SINGLEREF
@@ -5901,13 +5851,8 @@
 
 // Prediction buffer from second frame.
 #if CONFIG_HIGHBITDEPTH
-#if CONFIG_JNT_COMP
-  DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE + 2]);
-  uint8_t *second_pred;
-#else
   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
   uint8_t *second_pred;
-#endif  // CONFIG_JNT_COMP
 #else   // CONFIG_HIGHBITDEPTH
   DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
 #endif  // CONFIG_HIGHBITDEPTH
@@ -6046,7 +5991,8 @@
 
 #if CONFIG_JNT_COMP
     const int order_idx = id != 0;
-    jnt_comp_weight_assign(cm, mbmi, order_idx, second_pred);
+    av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
+                               &xd->jcp_param.bck_offset, 1);
 #endif  // CONFIG_JNT_COMP
 
     // Do compound motion search on the current reference frame.
@@ -6761,7 +6707,8 @@
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if CONFIG_JNT_COMP
-  jnt_comp_weight_assign(cm, mbmi, 0, second_pred);
+  av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
+                             &xd->jcp_param.bck_offset, 1);
 #endif  // CONFIG_JNT_COMP
 
   if (scaled_ref_frame) {
@@ -6930,11 +6877,7 @@
 
 // Prediction buffer from second frame.
 #if CONFIG_HIGHBITDEPTH
-#if CONFIG_JNT_COMP
-  DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE + 2]);
-#else
   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
-#endif  // CONFIG_JNT_COMP
   uint8_t *second_pred;
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
commit	f78632e00cb3c679b1485e9d3e34b824d878b0f4	[log] [tgz]
author	Cheng Chen <chengchen@google.com>	Fri Oct 20 15:30:51 2017 -0700
committer	Jingning Han <jingning@google.com>	Mon Nov 06 16:01:22 2017 +0000
tree	4f904d1a0035371e729b83622345b8c66ae5d16f
parent	7fc6b2ac7c85318a07b71ea3473c60d1fd88844c [diff]