JNT_COMP: fix issues with convolve_round

Fix an issue of sudden PSNR drop on a few frames when convolve_round
is turned on.

C functions is added.

Corresponding simd functions and unit tests will be updated in
future CL.

Change-Id: I0126ea4d54c98951e5b1efeaecd5468fdc18724a
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 15fc6ed..81f06a4 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -534,10 +534,27 @@
                   (1 << (offset_bits_horiz + FILTER_BITS -
                          conv_params->round_0 - conv_params->round_1)) -
                   (1 << (offset_bits_vert - conv_params->round_1));
+#if CONFIG_JNT_COMP
+            if (conv_params->fwd_offset != -1 &&
+                conv_params->bck_offset != -1) {
+              if (conv_params->do_average) {
+                *p += sum * conv_params->bck_offset;
+                *p = ROUND_POWER_OF_TWO(*p, DIST_PRECISION_BITS - 1);
+              } else {
+                *p = sum * conv_params->fwd_offset;
+              }
+            } else {
+              if (conv_params->do_average)
+                *p += sum;
+              else
+                *p = sum;
+            }
+#else
             if (conv_params->do_average)
               *p += sum;
             else
               *p = sum;
+#endif  // CONFIG_JNT_COMP
           } else {
 #else
           {
@@ -615,6 +632,10 @@
   uint16_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
 
   ConvolveParams conv_params = get_conv_params(0, 0, 0);
+#if CONFIG_JNT_COMP
+  conv_params.fwd_offset = -1;
+  conv_params.bck_offset = -1;
+#endif
   for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
     for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
       // avoid warping extra 8x8 blocks in the padded region of the frame
@@ -847,10 +868,27 @@
                   (1 << (offset_bits_horiz + FILTER_BITS -
                          conv_params->round_0 - conv_params->round_1)) -
                   (1 << (offset_bits_vert - conv_params->round_1));
+#if CONFIG_JNT_COMP
+            if (conv_params->fwd_offset != -1 &&
+                conv_params->bck_offset != -1) {
+              if (conv_params->do_average) {
+                *p += sum * conv_params->bck_offset;
+                *p = ROUND_POWER_OF_TWO(*p, DIST_PRECISION_BITS - 1);
+              } else {
+                *p = sum * conv_params->fwd_offset;
+              }
+            } else {
+              if (conv_params->do_average)
+                *p += sum;
+              else
+                *p = sum;
+            }
+#else
             if (conv_params->do_average)
               *p += sum;
             else
               *p = sum;
+#endif  // CONFIG_JNT_COMP
           } else {
 #else
           {
@@ -922,6 +960,10 @@
   int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
   uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
   ConvolveParams conv_params = get_conv_params(0, 0, 0);
+#if CONFIG_JNT_COMP
+  conv_params.fwd_offset = -1;
+  conv_params.bck_offset = -1;
+#endif
 
   for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
     for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {