Improve high bitdepth CLPF by using newly added v128_ssub_u16

Change-Id: I392b801f61b0d3bcd1cd6157ab783f76ea8c9e5e
diff --git a/av1/common/clpf_simd.h b/av1/common/clpf_simd.h
index 08d79ea..d64b952 100644
--- a/av1/common/clpf_simd.h
+++ b/av1/common/clpf_simd.h
@@ -256,10 +256,8 @@
   v128 diff = v128_sub_16(a, b);
   const v128 sign = v128_shr_n_s16(diff, 15);
   diff = v128_abs_s16(diff);
-  const v128 zero = v128_zero();
-  const v128 s = v128_max_s16(
-      zero, v128_sub_16(v128_dup_16(strength),
-                        v128_shr_u16(diff, dmp - get_msb(strength))));
+  const v128 s = v128_ssub_u16(v128_dup_16(strength),
+                               v128_shr_u16(diff, dmp - get_msb(strength)));
   return v128_sub_16(v128_xor(sign, v128_min_s16(diff, s)), sign);
 }