aom_simd_inline.h: add SIMD_CLAMP

and use it in various intrinsics; this will maintain the formatting with
newer versions of clang-format and makes some of the operations a bit
more readable.

Bug: aomedia:3136
Bug: b/229626362
Change-Id: I69d1b8cac5cd54a8d368c84f163303d4f8f12ac3
diff --git a/aom_dsp/aom_simd_inline.h b/aom_dsp/aom_simd_inline.h
index eb333f6..b4b1b35 100644
--- a/aom_dsp/aom_simd_inline.h
+++ b/aom_dsp/aom_simd_inline.h
@@ -18,4 +18,7 @@
 #define SIMD_INLINE static AOM_FORCE_INLINE
 #endif
 
+#define SIMD_CLAMP(value, min, max) \
+  ((value) > (max) ? (max) : (value) < (min) ? (min) : (value))
+
 #endif  // AOM_AOM_DSP_AOM_SIMD_INLINE_H_
diff --git a/aom_dsp/simd/v64_intrinsics_c.h b/aom_dsp/simd/v64_intrinsics_c.h
index b84f243..bfd6fe0 100644
--- a/aom_dsp/simd/v64_intrinsics_c.h
+++ b/aom_dsp/simd/v64_intrinsics_c.h
@@ -186,11 +186,7 @@
   c_v64 t;
   int c;
   for (c = 0; c < 8; c++)
-    t.u8[c] = (int16_t)a.u8[c] + (int16_t)b.u8[c] > 255
-                  ? 255
-                  : (int16_t)a.u8[c] + (int16_t)b.u8[c] < 0
-                        ? 0
-                        : (int16_t)a.u8[c] + (int16_t)b.u8[c];
+    t.u8[c] = SIMD_CLAMP((int16_t)a.u8[c] + (int16_t)b.u8[c], 0, 255);
   return t;
 }
 
@@ -198,11 +194,7 @@
   c_v64 t;
   int c;
   for (c = 0; c < 8; c++)
-    t.s8[c] = (int16_t)a.s8[c] + (int16_t)b.s8[c] > 127
-                  ? 127
-                  : (int16_t)a.s8[c] + (int16_t)b.s8[c] < -128
-                        ? -128
-                        : (int16_t)a.s8[c] + (int16_t)b.s8[c];
+    t.s8[c] = SIMD_CLAMP((int16_t)a.s8[c] + (int16_t)b.s8[c], -128, 127);
   return t;
 }
 
@@ -210,11 +202,7 @@
   c_v64 t;
   int c;
   for (c = 0; c < 4; c++)
-    t.s16[c] = (int32_t)a.s16[c] + (int32_t)b.s16[c] > 32767
-                   ? 32767
-                   : (int32_t)a.s16[c] + (int32_t)b.s16[c] < -32768
-                         ? -32768
-                         : (int32_t)a.s16[c] + (int32_t)b.s16[c];
+    t.s16[c] = SIMD_CLAMP((int32_t)a.s16[c] + (int32_t)b.s16[c], -32768, 32767);
   return t;
 }
 
@@ -244,7 +232,7 @@
   int c;
   for (c = 0; c < 8; c++) {
     int16_t d = (int16_t)a.s8[c] - (int16_t)b.s8[c];
-    t.s8[c] = d > 127 ? 127 : (d < -128 ? -128 : d);
+    t.s8[c] = SIMD_CLAMP(d, -128, 127);
   }
   return t;
 }
@@ -260,11 +248,7 @@
   c_v64 t;
   int c;
   for (c = 0; c < 4; c++)
-    t.s16[c] = (int32_t)a.s16[c] - (int32_t)b.s16[c] < -32768
-                   ? -32768
-                   : (int32_t)a.s16[c] - (int32_t)b.s16[c] > 32767
-                         ? 32767
-                         : (int32_t)a.s16[c] - (int32_t)b.s16[c];
+    t.s16[c] = SIMD_CLAMP((int32_t)a.s16[c] - (int32_t)b.s16[c], -32768, 32767);
   return t;
 }
 
@@ -481,10 +465,10 @@
     a = b;
     b = u;
   }
-  t.s16[3] = a.s32[1] > 32767 ? 32767 : a.s32[1] < -32768 ? -32768 : a.s32[1];
-  t.s16[2] = a.s32[0] > 32767 ? 32767 : a.s32[0] < -32768 ? -32768 : a.s32[0];
-  t.s16[1] = b.s32[1] > 32767 ? 32767 : b.s32[1] < -32768 ? -32768 : b.s32[1];
-  t.s16[0] = b.s32[0] > 32767 ? 32767 : b.s32[0] < -32768 ? -32768 : b.s32[0];
+  t.s16[3] = SIMD_CLAMP(a.s32[1], -32768, 32767);
+  t.s16[2] = SIMD_CLAMP(a.s32[0], -32768, 32767);
+  t.s16[1] = SIMD_CLAMP(b.s32[1], -32768, 32767);
+  t.s16[0] = SIMD_CLAMP(b.s32[0], -32768, 32767);
   return t;
 }
 
@@ -495,10 +479,10 @@
     a = b;
     b = u;
   }
-  t.u16[3] = a.s32[1] > 65535 ? 65535 : a.s32[1] < 0 ? 0 : a.s32[1];
-  t.u16[2] = a.s32[0] > 65535 ? 65535 : a.s32[0] < 0 ? 0 : a.s32[0];
-  t.u16[1] = b.s32[1] > 65535 ? 65535 : b.s32[1] < 0 ? 0 : b.s32[1];
-  t.u16[0] = b.s32[0] > 65535 ? 65535 : b.s32[0] < 0 ? 0 : b.s32[0];
+  t.u16[3] = SIMD_CLAMP(a.s32[1], 0, 65535);
+  t.u16[2] = SIMD_CLAMP(a.s32[0], 0, 65535);
+  t.u16[1] = SIMD_CLAMP(b.s32[1], 0, 65535);
+  t.u16[0] = SIMD_CLAMP(b.s32[0], 0, 65535);
   return t;
 }
 
@@ -509,14 +493,14 @@
     a = b;
     b = u;
   }
-  t.u8[7] = a.s16[3] > 255 ? 255 : a.s16[3] < 0 ? 0 : a.s16[3];
-  t.u8[6] = a.s16[2] > 255 ? 255 : a.s16[2] < 0 ? 0 : a.s16[2];
-  t.u8[5] = a.s16[1] > 255 ? 255 : a.s16[1] < 0 ? 0 : a.s16[1];
-  t.u8[4] = a.s16[0] > 255 ? 255 : a.s16[0] < 0 ? 0 : a.s16[0];
-  t.u8[3] = b.s16[3] > 255 ? 255 : b.s16[3] < 0 ? 0 : b.s16[3];
-  t.u8[2] = b.s16[2] > 255 ? 255 : b.s16[2] < 0 ? 0 : b.s16[2];
-  t.u8[1] = b.s16[1] > 255 ? 255 : b.s16[1] < 0 ? 0 : b.s16[1];
-  t.u8[0] = b.s16[0] > 255 ? 255 : b.s16[0] < 0 ? 0 : b.s16[0];
+  t.u8[7] = SIMD_CLAMP(a.s16[3], 0, 255);
+  t.u8[6] = SIMD_CLAMP(a.s16[2], 0, 255);
+  t.u8[5] = SIMD_CLAMP(a.s16[1], 0, 255);
+  t.u8[4] = SIMD_CLAMP(a.s16[0], 0, 255);
+  t.u8[3] = SIMD_CLAMP(b.s16[3], 0, 255);
+  t.u8[2] = SIMD_CLAMP(b.s16[2], 0, 255);
+  t.u8[1] = SIMD_CLAMP(b.s16[1], 0, 255);
+  t.u8[0] = SIMD_CLAMP(b.s16[0], 0, 255);
   return t;
 }
 
@@ -527,14 +511,14 @@
     a = b;
     b = u;
   }
-  t.u8[7] = (uint8_t)(a.s16[3] > 127 ? 127 : a.s16[3] < -128 ? 128 : a.s16[3]);
-  t.u8[6] = (uint8_t)(a.s16[2] > 127 ? 127 : a.s16[2] < -128 ? 128 : a.s16[2]);
-  t.u8[5] = (uint8_t)(a.s16[1] > 127 ? 127 : a.s16[1] < -128 ? 128 : a.s16[1]);
-  t.u8[4] = (uint8_t)(a.s16[0] > 127 ? 127 : a.s16[0] < -128 ? 128 : a.s16[0]);
-  t.u8[3] = (uint8_t)(b.s16[3] > 127 ? 127 : b.s16[3] < -128 ? 128 : b.s16[3]);
-  t.u8[2] = (uint8_t)(b.s16[2] > 127 ? 127 : b.s16[2] < -128 ? 128 : b.s16[2]);
-  t.u8[1] = (uint8_t)(b.s16[1] > 127 ? 127 : b.s16[1] < -128 ? 128 : b.s16[1]);
-  t.u8[0] = (uint8_t)(b.s16[0] > 127 ? 127 : b.s16[0] < -128 ? 128 : b.s16[0]);
+  t.s8[7] = SIMD_CLAMP(a.s16[3], -128, 127);
+  t.s8[6] = SIMD_CLAMP(a.s16[2], -128, 127);
+  t.s8[5] = SIMD_CLAMP(a.s16[1], -128, 127);
+  t.s8[4] = SIMD_CLAMP(a.s16[0], -128, 127);
+  t.s8[3] = SIMD_CLAMP(b.s16[3], -128, 127);
+  t.s8[2] = SIMD_CLAMP(b.s16[2], -128, 127);
+  t.s8[1] = SIMD_CLAMP(b.s16[1], -128, 127);
+  t.s8[0] = SIMD_CLAMP(b.s16[0], -128, 127);
   return t;
 }
 
@@ -702,13 +686,13 @@
   c_v64 t;
   int32_t u;
   u = a.u8[0] * b.s8[0] + a.u8[1] * b.s8[1];
-  t.s16[0] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[0] = SIMD_CLAMP(u, -32768, 32767);
   u = a.u8[2] * b.s8[2] + a.u8[3] * b.s8[3];
-  t.s16[1] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[1] = SIMD_CLAMP(u, -32768, 32767);
   u = a.u8[4] * b.s8[4] + a.u8[5] * b.s8[5];
-  t.s16[2] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[2] = SIMD_CLAMP(u, -32768, 32767);
   u = a.u8[6] * b.s8[6] + a.u8[7] * b.s8[7];
-  t.s16[3] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[3] = SIMD_CLAMP(u, -32768, 32767);
   return t;
 }
 
diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h
index f1500b8..ec27a6b 100644
--- a/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/aom_dsp/simd/v64_intrinsics_x86.h
@@ -178,14 +178,11 @@
   __m128i t = _mm_unpacklo_epi64(b, a);
   return _mm_packus_epi32(t, t);
 #else
-  int32_t ah = v64_high_s32(a);
-  int32_t al = v64_low_s32(a);
-  int32_t bh = v64_high_s32(b);
-  int32_t bl = v64_low_s32(b);
-  return v64_from_16(ah > 65535 ? 65535 : ah < 0 ? 0 : ah,
-                     al > 65535 ? 65535 : al < 0 ? 0 : al,
-                     bh > 65535 ? 65535 : bh < 0 ? 0 : bh,
-                     bl > 65535 ? 65535 : bl < 0 ? 0 : bl);
+  const int32_t ah = SIMD_CLAMP(v64_high_s32(a), 0, 65535);
+  const int32_t al = SIMD_CLAMP(v64_low_s32(a), 0, 65535);
+  const int32_t bh = SIMD_CLAMP(v64_high_s32(b), 0, 65535);
+  const int32_t bl = SIMD_CLAMP(v64_low_s32(b), 0, 65535);
+  return v64_from_16(ah, al, bh, bl);
 #endif
 }