od_filter_dering_direction() no longer returns the total change

Change-Id: Ibe19f4422324680b0d3132eab79bdc035911d79d
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index d110e97..310e604 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -626,8 +626,8 @@
   add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
   add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
   add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
-  add_proto qw/int od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
-  add_proto qw/int od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
+  add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
+  add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
 
   add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
   add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
index 58029c1..0c9546e 100644
--- a/av1/common/od_dering.c
+++ b/av1/common/od_dering.c
@@ -113,14 +113,13 @@
 }
 
 /* Smooth in the direction detected. */
-int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
-                                     const uint16_t *in, int threshold,
-                                     int dir) {
+void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
+                                      const uint16_t *in, int threshold,
+                                      int dir) {
   int i;
   int j;
   int k;
   static const int taps[3] = { 3, 2, 1 };
-  int total_abs = 0;
   for (i = 0; i < 8; i++) {
     for (j = 0; j < 8; j++) {
       int16_t sum;
@@ -139,23 +138,20 @@
         if (abs(p1) < threshold) sum += taps[k] * p1;
       }
       sum = (sum + 8) >> 4;
-      total_abs += abs(sum);
       yy = xx + sum;
       y[i * ystride + j] = yy;
     }
   }
-  return (total_abs + 8) >> 4;
 }
 
 /* Smooth in the direction detected. */
-int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
-                                     const uint16_t *in, int threshold,
-                                     int dir) {
+void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
+                                      const uint16_t *in, int threshold,
+                                      int dir) {
   int i;
   int j;
   int k;
   static const int taps[2] = { 4, 1 };
-  int total_abs = 0;
   for (i = 0; i < 4; i++) {
     for (j = 0; j < 4; j++) {
       int16_t sum;
@@ -174,12 +170,10 @@
         if (abs(p1) < threshold) sum += taps[k] * p1;
       }
       sum = (sum + 8) >> 4;
-      total_abs += abs(sum);
       yy = xx + sum;
       y[i * ystride + j] = yy;
     }
   }
-  return (total_abs + 2) >> 2;
 }
 
 /* This table approximates x^0.16 with the index being log2(x). It is clamped
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h
index 475a753..523f419 100644
--- a/av1/common/od_dering.h
+++ b/av1/common/od_dering.h
@@ -39,9 +39,9 @@
   unsigned char bx;
 } dering_list;
 
-typedef int (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
-                                               const uint16_t *in,
-                                               int threshold, int dir);
+typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
+                                                const uint16_t *in,
+                                                int threshold, int dir);
 void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
                                 dering_list *dlist, int dering_count,
                                 BLOCK_SIZE bsize);
diff --git a/av1/common/od_dering_simd.h b/av1/common/od_dering_simd.h
index 3435d7c..266cb3c 100644
--- a/av1/common/od_dering_simd.h
+++ b/av1/common/od_dering_simd.h
@@ -214,9 +214,9 @@
   return v128_cmplt_s16(v128_abs_s16(in), threshold);
 }
 
-int SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
-                                              const uint16_t *in, int threshold,
-                                              int dir) {
+void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
+                                               const uint16_t *in,
+                                               int threshold, int dir) {
   int i;
   v128 sum;
   v128 p;
@@ -225,11 +225,9 @@
   v128 res;
   v128 tmp;
   v128 thresh;
-  v128 total_abs;
   int off1, off2;
   off1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
   off2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
-  total_abs = v128_zero();
   thresh = v128_dup_16(threshold);
   for (i = 0; i < 4; i += 2) {
     sum = v128_zero();
@@ -275,17 +273,15 @@
     /*res = row + ((sum + 8) >> 4)*/
     res = v128_add_16(sum, v128_dup_16(8));
     res = v128_shr_n_s16(res, 4);
-    total_abs = v128_add_16(total_abs, v128_abs_s16(res));
     res = v128_add_16(row, res);
     v64_store_aligned(&y[i * ystride], v128_low_v64(res));
     v64_store_aligned(&y[(i + 1) * ystride], v128_high_v64(res));
   }
-  return (int)((v128_dotp_s16(total_abs, v128_dup_16(1)) + 2) >> 2);
 }
 
-int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
-                                              const uint16_t *in, int threshold,
-                                              int dir) {
+void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
+                                               const uint16_t *in,
+                                               int threshold, int dir) {
   int i;
   v128 sum;
   v128 p0, p1;
@@ -293,12 +289,10 @@
   v128 row;
   v128 res;
   v128 thresh;
-  v128 total_abs;
   int off1, off2, off3;
   off1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
   off2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
   off3 = OD_DIRECTION_OFFSETS_TABLE[dir][2];
-  total_abs = v128_zero();
   thresh = v128_dup_16(threshold);
   for (i = 0; i < 8; i++) {
     sum = v128_zero();
@@ -353,11 +347,9 @@
     /*res = row + ((sum + 8) >> 4)*/
     res = v128_add_16(sum, v128_dup_16(8));
     res = v128_shr_n_s16(res, 4);
-    total_abs = v128_add_16(total_abs, v128_abs_s16(res));
     res = v128_add_16(row, res);
     v128_store_unaligned(&y[i * ystride], res);
   }
-  return (int)((v128_dotp_s16(total_abs, v128_dup_16(1)) + 8) >> 4);
 }
 
 void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
diff --git a/test/dering_test.cc b/test/dering_test.cc
index 781b7ce..9b225ef 100644
--- a/test/dering_test.cc
+++ b/test/dering_test.cc
@@ -27,8 +27,8 @@
 
 namespace {
 
-typedef int (*dering_dir_t)(uint16_t *y, int ystride, const uint16_t *in,
-                            int threshold, int dir);
+typedef void (*dering_dir_t)(uint16_t *y, int ystride, const uint16_t *in,
+                             int threshold, int dir);
 
 typedef std::tr1::tuple<dering_dir_t, dering_dir_t, int> dering_dir_param_t;
 
@@ -52,10 +52,11 @@
 typedef CDEFDeringDirTest CDEFDeringSpeedTest;
 
 void test_dering(int bsize, int iterations,
-                 int (*dering)(uint16_t *y, int ystride, const uint16_t *in,
-                               int threshold, int dir),
-                 int (*ref_dering)(uint16_t *y, int ystride, const uint16_t *in,
-                                   int threshold, int dir)) {
+                 void (*dering)(uint16_t *y, int ystride, const uint16_t *in,
+                                int threshold, int dir),
+                 void (*ref_dering)(uint16_t *y, int ystride,
+                                    const uint16_t *in, int threshold,
+                                    int dir)) {
   const int size = 8;
   const int ysize = size + 2 * OD_FILT_VBORDER;
   ACMRandom rnd(ACMRandom::DeterministicSeed());
@@ -69,7 +70,6 @@
   int boundary, depth, bits, level, count, errdepth = 0, errthreshold = 0,
                                            errboundary = 0;
   unsigned int pos = 0;
-  int ref_res = 0, res = 0;
 
   for (boundary = 0; boundary < 16; boundary++) {
     for (depth = 8; depth <= 12; depth += 2) {
@@ -105,18 +105,16 @@
             for (dir = 0; dir < 8; dir++) {
               for (threshold = 0; threshold < 64 << (depth - 8) && !error;
                    threshold += !error << (depth - 8)) {
-                ref_res = ref_dering(
-                    ref_d, size,
-                    s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
-                    threshold, dir);
+                ref_dering(ref_d, size, s + OD_FILT_HBORDER +
+                                            OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+                           threshold, dir);
                 // If dering and ref_dering are the same, we're just testing
                 // speed
                 if (dering != ref_dering)
-                  ASM_REGISTER_STATE_CHECK(
-                      res =
-                          dering(d, size, s + OD_FILT_HBORDER +
-                                              OD_FILT_VBORDER * OD_FILT_BSTRIDE,
-                                 threshold, dir));
+                  ASM_REGISTER_STATE_CHECK(dering(
+                      d, size,
+                      s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
+                      threshold, dir));
                 if (ref_dering != dering) {
                   for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error; pos++) {
                     error = ref_d[pos] != d[pos];
@@ -124,7 +122,6 @@
                     errthreshold = threshold;
                     errboundary = boundary;
                   }
-                  error |= res != ref_res;
                 }
               }
             }
@@ -140,7 +137,6 @@
                       << "First error at " << pos % size << "," << pos / size
                       << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
                       << ") " << std::endl
-                      << "return: " << res << " : " << ref_res << std::endl
                       << "threshold: " << errthreshold << std::endl
                       << "depth: " << errdepth << std::endl
                       << "size: " << bsize << std::endl
@@ -149,12 +145,12 @@
 }
 
 void test_dering_speed(int bsize, int iterations,
-                       int (*dering)(uint16_t *y, int ystride,
-                                     const uint16_t *in, int threshold,
-                                     int dir),
-                       int (*ref_dering)(uint16_t *y, int ystride,
-                                         const uint16_t *in, int threshold,
-                                         int dir)) {
+                       void (*dering)(uint16_t *y, int ystride,
+                                      const uint16_t *in, int threshold,
+                                      int dir),
+                       void (*ref_dering)(uint16_t *y, int ystride,
+                                          const uint16_t *in, int threshold,
+                                          int dir)) {
   aom_usec_timer ref_timer;
   aom_usec_timer timer;