Early termination for warp error computation This terminates the computation for the warp error once the frame error exceeds the best frame error found so far to avoid unneccessary computation. Change-Id: I094a0b3e13f8b91610e051cb91d20a815879dd80

commit: 81f6ecd1217d44c7dbcb33d5392d154ef72c097a [log] [tgz]
author: Sarah Parker <sarahparker@google.com> Fri May 26 16:10:11 2017 -0700
committer: Sarah Parker <sarahparker@google.com> Mon Jun 05 17:19:12 2017 +0000
tree: 84f804719bb50259b0c9378622c2ea64214d7c7f
parent: 28c779beb3f878ceabf68f1a41ba7d9088e99d66 [diff]
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index fc83268..6158b15 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c

@@ -18,6 +18,8 @@
 #include "./av1_rtcd.h"
 #include "av1/common/warped_motion.h"
 
+#define WARP_ERROR_BLOCK 32
+
 /* clang-format off */
 static const int error_measure_lut[512] = {
   // pow 0.7
@@ -1080,14 +1082,13 @@
 }
 
 static int64_t highbd_frame_error(const uint16_t *const ref, int stride,
-                                  const uint16_t *const dst, int p_col,
-                                  int p_row, int p_width, int p_height,
-                                  int p_stride, int bd) {
+                                  const uint16_t *const dst, int p_width,
+                                  int p_height, int p_stride, int bd) {
   int64_t sum_error = 0;
   for (int i = 0; i < p_height; ++i) {
     for (int j = 0; j < p_width; ++j) {
-      sum_error += highbd_error_measure(
-          dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride], bd);
+      sum_error +=
+          highbd_error_measure(dst[j + i * p_stride] - ref[j + i * stride], bd);
     }
   }
   return sum_error;
@@ -1097,19 +1098,30 @@
     WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height,
     int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width,
     int p_height, int p_stride, int subsampling_x, int subsampling_y,
-    int x_scale, int y_scale, int bd) {
+    int x_scale, int y_scale, int bd, int64_t best_error) {
   int64_t gm_sumerr = 0;
-  uint16_t *tmp = aom_malloc(p_width * p_height * sizeof(*tmp));
-  if (!tmp) return INT64_MAX;
+  int warp_w, warp_h;
+  int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
+  int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
+  uint16_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
 
-  highbd_warp_plane(wm, ref8, width, height, stride, CONVERT_TO_BYTEPTR(tmp),
-                    p_col, p_row, p_width, p_height, p_width, subsampling_x,
-                    subsampling_y, x_scale, y_scale, bd, 0);
+  for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
+    for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
+      // avoid warping extra 8x8 blocks in the padded region of the frame
+      // when p_width and p_height are not multiples of WARP_ERROR_BLOCK
+      warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
+      warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
+      highbd_warp_plane(wm, ref8, width, height, stride,
+                        CONVERT_TO_BYTEPTR(tmp), j, i, warp_w, warp_h,
+                        WARP_ERROR_BLOCK, subsampling_x, subsampling_y, x_scale,
+                        y_scale, bd, 0);
 
-  gm_sumerr = highbd_frame_error(tmp, p_width, CONVERT_TO_SHORTPTR(dst8), p_col,
-                                 p_row, p_width, p_height, p_stride, bd);
-
-  aom_free(tmp);
+      gm_sumerr += highbd_frame_error(
+          tmp, WARP_ERROR_BLOCK, CONVERT_TO_SHORTPTR(dst8) + j + i * p_stride,
+          warp_w, warp_h, p_stride, bd);
+      if (gm_sumerr > best_error) return gm_sumerr;
+    }
+  }
   return gm_sumerr;
 }
 #endif  // CONFIG_HIGHBITDEPTH
@@ -1368,13 +1380,13 @@
 }
 
 static int64_t frame_error(const uint8_t *const ref, int stride,
-                           const uint8_t *const dst, int p_col, int p_row,
-                           int p_width, int p_height, int p_stride) {
+                           const uint8_t *const dst, int p_width, int p_height,
+                           int p_stride) {
   int64_t sum_error = 0;
   for (int i = 0; i < p_height; ++i) {
     for (int j = 0; j < p_width; ++j) {
-      sum_error += (int64_t)error_measure(
-          dst[j + i * p_stride] - ref[(j + p_col) + (i + p_row) * stride]);
+      sum_error +=
+          (int64_t)error_measure(dst[j + i * p_stride] - ref[j + i * stride]);
     }
   }
   return sum_error;
@@ -1385,19 +1397,28 @@
                           const uint8_t *const dst, int p_col, int p_row,
                           int p_width, int p_height, int p_stride,
                           int subsampling_x, int subsampling_y, int x_scale,
-                          int y_scale) {
+                          int y_scale, int64_t best_error) {
   int64_t gm_sumerr = 0;
-  uint8_t *tmp = aom_malloc(p_width * p_height);
-  if (!tmp) return INT64_MAX;
+  int warp_w, warp_h;
+  int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
+  int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
+  uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
 
-  warp_plane(wm, ref, width, height, stride, tmp, p_col, p_row, p_width,
-             p_height, p_width, subsampling_x, subsampling_y, x_scale, y_scale,
-             0);
+  for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
+    for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
+      // avoid warping extra 8x8 blocks in the padded region of the frame
+      // when p_width and p_height are not multiples of WARP_ERROR_BLOCK
+      warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
+      warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
+      warp_plane(wm, ref, width, height, stride, tmp, j, i, warp_w, warp_h,
+                 WARP_ERROR_BLOCK, subsampling_x, subsampling_y, x_scale,
+                 y_scale, 0);
 
-  gm_sumerr =
-      frame_error(tmp, p_width, dst, p_col, p_row, p_width, p_height, p_stride);
-
-  aom_free(tmp);
+      gm_sumerr += frame_error(tmp, WARP_ERROR_BLOCK, dst + j + i * p_stride,
+                               warp_w, warp_h, p_stride);
+      if (gm_sumerr > best_error) return gm_sumerr;
+    }
+  }
   return gm_sumerr;
 }
 
@@ -1405,17 +1426,16 @@
 #if CONFIG_HIGHBITDEPTH
     int use_hbd, int bd,
 #endif  // CONFIG_HIGHBITDEPTH
-    const uint8_t *ref, int stride, uint8_t *dst, int p_col, int p_row,
-    int p_width, int p_height, int p_stride) {
+    const uint8_t *ref, int stride, uint8_t *dst, int p_width, int p_height,
+    int p_stride) {
 #if CONFIG_HIGHBITDEPTH
   if (use_hbd) {
     return highbd_frame_error(CONVERT_TO_SHORTPTR(ref), stride,
-                              CONVERT_TO_SHORTPTR(dst), p_col, p_row, p_width,
-                              p_height, p_stride, bd);
+                              CONVERT_TO_SHORTPTR(dst), p_width, p_height,
+                              p_stride, bd);
   }
 #endif  // CONFIG_HIGHBITDEPTH
-  return frame_error(ref, stride, dst, p_col, p_row, p_width, p_height,
-                     p_stride);
+  return frame_error(ref, stride, dst, p_width, p_height, p_stride);
 }
 
 int64_t av1_warp_error(WarpedMotionParams *wm,
@@ -1425,18 +1445,19 @@
                        const uint8_t *ref, int width, int height, int stride,
                        uint8_t *dst, int p_col, int p_row, int p_width,
                        int p_height, int p_stride, int subsampling_x,
-                       int subsampling_y, int x_scale, int y_scale) {
+                       int subsampling_y, int x_scale, int y_scale,
+                       int64_t best_error) {
   if (wm->wmtype <= AFFINE)
     if (!get_shear_params(wm)) return 1;
 #if CONFIG_HIGHBITDEPTH
   if (use_hbd)
     return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row,
                              p_width, p_height, p_stride, subsampling_x,
-                             subsampling_y, x_scale, y_scale, bd);
+                             subsampling_y, x_scale, y_scale, bd, best_error);
 #endif  // CONFIG_HIGHBITDEPTH
   return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width,
                     p_height, p_stride, subsampling_x, subsampling_y, x_scale,
-                    y_scale);
+                    y_scale, best_error);
 }
 
 void av1_warp_plane(WarpedMotionParams *wm,

diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index 78abced..d0b61aa 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h

@@ -82,7 +82,8 @@
                        const uint8_t *ref, int width, int height, int stride,
                        uint8_t *dst, int p_col, int p_row, int p_width,
                        int p_height, int p_stride, int subsampling_x,
-                       int subsampling_y, int x_scale, int y_scale);
+                       int subsampling_y, int x_scale, int y_scale,
+                       int64_t best_error);
 
 // Returns the error between the frame described by 'ref' and the frame
 // described by 'dst'.
@@ -90,8 +91,8 @@
 #if CONFIG_HIGHBITDEPTH
     int use_hbd, int bd,
 #endif  // CONFIG_HIGHBITDEPTH
-    const uint8_t *ref, int stride, uint8_t *dst, int p_col, int p_row,
-    int p_width, int p_height, int p_stride);
+    const uint8_t *ref, int stride, uint8_t *dst, int p_width, int p_height,
+    int p_stride);
 
 void av1_warp_plane(WarpedMotionParams *wm,
 #if CONFIG_HIGHBITDEPTH

diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 36d09c0..b6e0f5d 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -4716,8 +4716,8 @@
             xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
 #endif  // CONFIG_HIGHBITDEPTH
             ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
-            cpi->source->y_buffer, 0, 0, cpi->source->y_width,
-            cpi->source->y_height, cpi->source->y_stride);
+            cpi->source->y_buffer, cpi->source->y_width, cpi->source->y_height,
+            cpi->source->y_stride);
 
         if (ref_frame_error == 0) continue;
 
@@ -4752,7 +4752,8 @@
                   ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
                   ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
                   cpi->source->y_buffer, cpi->source->y_width,
-                  cpi->source->y_height, cpi->source->y_stride, 3);
+                  cpi->source->y_height, cpi->source->y_stride, 3,
+                  best_warp_error);
               if (warp_error < best_warp_error) {
                 best_warp_error = warp_error;
                 // Save the wm_params modified by refine_integerized_param()

diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index 74cbc8a..da1e403 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c

@@ -131,8 +131,8 @@
 #endif  // CONFIG_HIGHBITDEPTH
                                  uint8_t *ref, int r_width, int r_height,
                                  int r_stride, uint8_t *dst, int d_width,
-                                 int d_height, int d_stride,
-                                 int n_refinements) {
+                                 int d_height, int d_stride, int n_refinements,
+                                 int64_t best_frame_error) {
   static const int max_trans_model_params[TRANS_TYPES] = {
     0, 2, 4, 6, 8, 8, 8
   };
@@ -154,7 +154,8 @@
                               ref, r_width, r_height, r_stride,
                               dst + border * d_stride + border, border, border,
                               d_width - 2 * border, d_height - 2 * border,
-                              d_stride, 0, 0, 16, 16);
+                              d_stride, 0, 0, 16, 16, best_frame_error);
+  best_error = AOMMIN(best_error, best_frame_error);
   step = 1 << (n_refinements + 1);
   for (i = 0; i < n_refinements; i++, step >>= 1) {
     for (p = 0; p < n_params; ++p) {
@@ -174,7 +175,7 @@
 #endif  // CONFIG_HIGHBITDEPTH
           ref, r_width, r_height, r_stride, dst + border * d_stride + border,
           border, border, d_width - 2 * border, d_height - 2 * border, d_stride,
-          0, 0, 16, 16);
+          0, 0, 16, 16, best_error);
       if (step_error < best_error) {
         best_error = step_error;
         best_param = *param;
@@ -190,7 +191,7 @@
 #endif  // CONFIG_HIGHBITDEPTH
           ref, r_width, r_height, r_stride, dst + border * d_stride + border,
           border, border, d_width - 2 * border, d_height - 2 * border, d_stride,
-          0, 0, 16, 16);
+          0, 0, 16, 16, best_error);
       if (step_error < best_error) {
         best_error = step_error;
         best_param = *param;
@@ -209,7 +210,7 @@
 #endif  // CONFIG_HIGHBITDEPTH
             ref, r_width, r_height, r_stride, dst + border * d_stride + border,
             border, border, d_width - 2 * border, d_height - 2 * border,
-            d_stride, 0, 0, 16, 16);
+            d_stride, 0, 0, 16, 16, best_error);
         if (step_error < best_error) {
           best_error = step_error;
           best_param = *param;

diff --git a/av1/encoder/global_motion.h b/av1/encoder/global_motion.h
index 38509df..7fca532 100644
--- a/av1/encoder/global_motion.h
+++ b/av1/encoder/global_motion.h

@@ -36,7 +36,8 @@
 #endif  // CONFIG_HIGHBITDEPTH
                                  uint8_t *ref, int r_width, int r_height,
                                  int r_stride, uint8_t *dst, int d_width,
-                                 int d_height, int d_stride, int n_refinements);
+                                 int d_height, int d_stride, int n_refinements,
+                                 int64_t best_frame_error);
 
 /*
   Computes "num_motions" candidate global motion parameters between two frames.
commit	81f6ecd1217d44c7dbcb33d5392d154ef72c097a	[log] [tgz]
author	Sarah Parker <sarahparker@google.com>	Fri May 26 16:10:11 2017 -0700
committer	Sarah Parker <sarahparker@google.com>	Mon Jun 05 17:19:12 2017 +0000
tree	84f804719bb50259b0c9378622c2ea64214d7c7f
parent	28c779beb3f878ceabf68f1a41ba7d9088e99d66 [diff]