intrapred_neon: convert some for loops to do/while no change in assembly in optimized builds where the functions are inlined (gcc & clang) Bug: b/231616924 Change-Id: I5442a7a689d32124d18b1eb7b984806c4628f416

commit: 845f9c090b451eb7a7609f4fad01e2d8692209c7 [log] [tgz]
author: James Zern <jzern@google.com> Wed May 11 15:14:44 2022 -0700
committer: James Zern <jzern@google.com> Mon May 16 23:45:23 2022 +0000
tree: 838cbc01ea8fb7a14c17e9add27bdbc0b18a5a1a
parent: 62672783861dcac50135445568a4218d0a9e3a3a [diff]
diff --git a/aom_dsp/arm/intrapred_neon.c b/aom_dsp/arm/intrapred_neon.c
index 4c5d22b..8e6dc12 100644
--- a/aom_dsp/arm/intrapred_neon.c
+++ b/aom_dsp/arm/intrapred_neon.c

@@ -2672,7 +2672,9 @@
   const uint8x8_t scaled_weights_x = negate_s8(weights_x_v);
   const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v);
 
-  for (int y = 0; y < height; ++y) {
+  assert(height > 0);
+  int y = 0;
+  do {
     const uint8x8_t left_v = vdup_n_u8(left_column[y]);
     const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);
     const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);
@@ -2687,7 +2689,7 @@
 
     vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(result), 0);
     dst += stride;
-  }
+  } while (++y != height);
 }
 
 static INLINE uint8x8_t calculate_pred(const uint16x8_t weighted_top_bl,
@@ -2723,7 +2725,9 @@
   const uint8x8_t scaled_weights_x = negate_s8(weights_x_v);
   const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v);
 
-  for (int y = 0; y < height; ++y) {
+  assert(height > 0);
+  int y = 0;
+  do {
     const uint8x8_t left_v = vdup_n_u8(left_column[y]);
     const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);
     const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);
@@ -2733,7 +2737,7 @@
 
     vst1_u8(dst, result);
     dst += stride;
-  }
+  } while (++y != height);
 }
 
 #define SMOOTH_NXM(W, H)                                                       \
@@ -2903,7 +2907,9 @@
                                                                       \
     const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);           \
                                                                       \
-    for (int y = 0; y < height; ++y) {                                \
+    assert(height > 0);                                               \
+    int y = 0;                                                        \
+    do {                                                              \
       const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);          \
       const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);      \
                                                                       \
@@ -2919,7 +2925,7 @@
         vst1_u8(dst, pred);                                           \
       }                                                               \
       dst += stride;                                                  \
-    }                                                                 \
+    } while (++y != height);                                          \
   }
 
 SMOOTH_V_PREDICTOR(4)
@@ -2978,7 +2984,9 @@
                                                                          \
     const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);              \
                                                                          \
-    for (int y = 0; y < height; ++y) {                                   \
+    assert(height > 0);                                                  \
+    int y = 0;                                                           \
+    do {                                                                 \
       const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);             \
       const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);         \
       const uint16x8_t weighted_bl =                                     \
@@ -3005,7 +3013,7 @@
       }                                                                  \
                                                                          \
       dst += stride;                                                     \
-    }                                                                    \
+    } while (++y != height);                                             \
   }
 
 SMOOTH_V_PREDICTOR(16)
@@ -3052,7 +3060,9 @@
     const uint8x8_t scaled_weights_x = negate_s8(weights_x);                \
     const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v); \
                                                                             \
-    for (int y = 0; y < height; ++y) {                                      \
+    assert(height > 0);                                                     \
+    int y = 0;                                                              \
+    do {                                                                    \
       const uint8x8_t left_v = vdup_n_u8(left_column[y]);                   \
       const uint16x8_t weighted_left_tr =                                   \
           vmlal_u8(weighted_tr, weights_x, left_v);                         \
@@ -3065,7 +3075,7 @@
         vst1_u8(dst, pred);                                                 \
       }                                                                     \
       dst += stride;                                                        \
-    }                                                                       \
+    } while (++y != height);                                                \
   }
 
 SMOOTH_H_PREDICTOR(4)
@@ -3137,7 +3147,9 @@
       }                                                                    \
     }                                                                      \
                                                                            \
-    for (int y = 0; y < height; ++y) {                                     \
+    assert(height > 0);                                                    \
+    int y = 0;                                                             \
+    do {                                                                   \
       const uint8x8_t left_v = vdup_n_u8(left_column[y]);                  \
                                                                            \
       const uint8x16_t pred_0 = calculate_horizontal_weights_and_pred(     \
@@ -3160,7 +3172,7 @@
         }                                                                  \
       }                                                                    \
       dst += stride;                                                       \
-    }                                                                      \
+    } while (++y != height);                                               \
   }
 
 SMOOTH_H_PREDICTOR(16)
@@ -3207,7 +3219,9 @@
     top = vld1_u8(top_row);
   }
 
-  for (int y = 0; y < height; ++y) {
+  assert(height > 0);
+  int y = 0;
+  do {
     const uint8x8_t left = vdup_n_u8(left_column[y]);
 
     const uint8x8_t left_dist = vabd_u8(top, top_left);
@@ -3241,7 +3255,7 @@
       vst1_u8(dest, result);
     }
     dest += stride;
-  }
+  } while (++y != height);
 }
 
 #define PAETH_NXM(W, H)                                                     \
@@ -3326,7 +3340,9 @@
     }
   }
 
-  for (int y = 0; y < height; ++y) {
+  assert(height > 0);
+  int y = 0;
+  do {
     const uint8x16_t left = vdupq_n_u8(left_column[y]);
 
     const uint8x16_t top_dist = vabdq_u8(left, top_left);
@@ -3380,7 +3396,7 @@
     }
 
     dest += stride;
-  }
+  } while (++y != height);
 }
 
 #define PAETH_NXM_WIDE(W, H)                                                \
commit	845f9c090b451eb7a7609f4fad01e2d8692209c7	[log] [tgz]
author	James Zern <jzern@google.com>	Wed May 11 15:14:44 2022 -0700
committer	James Zern <jzern@google.com>	Mon May 16 23:45:23 2022 +0000
tree	838cbc01ea8fb7a14c17e9add27bdbc0b18a5a1a
parent	62672783861dcac50135445568a4218d0a9e3a3a [diff]