dr_prediction_z2_Nx8_sse4_1: quiet -Wmaybe-unintialized warnings Reorder some operations to avoid unneeded work when there have been no y calculations (base_x >= min_base_x). This change is similar to the one done for NEON: 0484e4d9f5 dr_prediction_z2_Nx8_neon: quiet -Wunintialized warnings Quiets the following under gcc 12.2.0 targeting x86: In function ‘_mm_mullo_epi16’, inlined from ‘dr_prediction_z2_Nx8_sse4_1’ at aom_dsp/x86/intrapred_sse4.c:700:9, inlined from ‘av1_dr_prediction_z2_sse4_1’ at aom_dsp/x86/intrapred_sse4.c:921:7: /usr/lib/gcc/x86_64-linux-gnu/12/include/emmintrin.h:1162:35: warning: ‘shifty’ may be used uninitialized [-Wmaybe-uninitialized] 1162 | return (__m128i) ((__v8hu)__A * (__v8hu)__B); | ^~~~~~~~~~~ aom_dsp/x86/intrapred_sse4.c: In function ‘av1_dr_prediction_z2_sse4_1’: aom_dsp/x86/intrapred_sse4.c:605:34: note: ‘shifty’ was declared here 605 | __m128i b, res, res1, shift, shifty; | Change-Id: I4d414fea6cdd4d76c6dff0ce6f1fb25ba9c04a54

commit: 1c0a9a3ddd8de6404819239937389c5a85edc823 [log] [tgz]
author: James Zern <jzern@google.com> Mon Mar 27 18:28:38 2023 -0700
committer: James Zern <jzern@google.com> Wed Mar 29 18:31:46 2023 +0000
tree: 0e0866f7b932040c37937e59069983630dd86f1c
parent: 8a40f783702f9946ee767f55bbefe369e483769e [diff]
diff --git a/aom_dsp/x86/intrapred_sse4.c b/aom_dsp/x86/intrapred_sse4.c
index 3f72dc4..fb30420 100644
--- a/aom_dsp/x86/intrapred_sse4.c
+++ b/aom_dsp/x86/intrapred_sse4.c

@@ -602,7 +602,7 @@
   const __m128i c1234 = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
 
   for (int r = 0; r < N; r++) {
-    __m128i b, res, res1, shift, shifty;
+    __m128i b, res, res1, shift;
     __m128i resx, resy, resxy, r6, ydx;
 
     int y = r + 1;
@@ -620,11 +620,7 @@
     }
 
     if (base_shift > 7) {
-      a0_x = _mm_setzero_si128();
-      a1_x = _mm_setzero_si128();
-      a0_y = _mm_setzero_si128();
-      a1_y = _mm_setzero_si128();
-      shift = _mm_setzero_si128();
+      resx = _mm_setzero_si128();
     } else {
       a0_above = _mm_loadu_si128((__m128i *)(above + base_x + base_shift));
       ydx = _mm_set1_epi16(y * dx);
@@ -649,9 +645,15 @@
       }
       a0_x = _mm_cvtepu8_epi16(a0_above);
       a1_x = _mm_cvtepu8_epi16(a1_above);
-      a0_y = _mm_setzero_si128();
-      a1_y = _mm_setzero_si128();
-      shifty = shift;
+
+      diff = _mm_sub_epi16(a1_x, a0_x);  // a[x+1] - a[x]
+      a32 = _mm_slli_epi16(a0_x, 5);     // a[x] * 32
+      a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
+
+      b = _mm_mullo_epi16(diff, shift);
+      res = _mm_add_epi16(a32, b);
+      res = _mm_srli_epi16(res, 5);
+      resx = _mm_packus_epi16(res, res);
     }
 
     // y calc
@@ -678,34 +680,27 @@
                             left[base_y_c[6]], left[base_y_c[7]]);
 
       if (upsample_left) {
-        shifty = _mm_srli_epi16(
+        shift = _mm_srli_epi16(
             _mm_and_si128(_mm_slli_epi16(y_c, upsample_left), c3f), 1);
       } else {
-        shifty = _mm_srli_epi16(_mm_and_si128(y_c, c3f), 1);
+        shift = _mm_srli_epi16(_mm_and_si128(y_c, c3f), 1);
       }
+
+      diff = _mm_sub_epi16(a1_y, a0_y);  // a[x+1] - a[x]
+      a32 = _mm_slli_epi16(a0_y, 5);     // a[x] * 32
+      a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
+
+      b = _mm_mullo_epi16(diff, shift);
+      res1 = _mm_add_epi16(a32, b);
+      res1 = _mm_srli_epi16(res1, 5);
+
+      resy = _mm_packus_epi16(res1, res1);
+      resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
+      _mm_storel_epi64((__m128i *)dst, resxy);
+    } else {
+      _mm_storel_epi64((__m128i *)dst, resx);
     }
 
-    diff = _mm_sub_epi16(a1_x, a0_x);  // a[x+1] - a[x]
-    a32 = _mm_slli_epi16(a0_x, 5);     // a[x] * 32
-    a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
-
-    b = _mm_mullo_epi16(diff, shift);
-    res = _mm_add_epi16(a32, b);
-    res = _mm_srli_epi16(res, 5);
-
-    diff = _mm_sub_epi16(a1_y, a0_y);  // a[x+1] - a[x]
-    a32 = _mm_slli_epi16(a0_y, 5);     // a[x] * 32
-    a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
-
-    b = _mm_mullo_epi16(diff, shifty);
-    res1 = _mm_add_epi16(a32, b);
-    res1 = _mm_srli_epi16(res1, 5);
-
-    resx = _mm_packus_epi16(res, res);
-    resy = _mm_packus_epi16(res1, res1);
-
-    resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
-    _mm_storel_epi64((__m128i *)(dst), resxy);
     dst += stride;
   }
 }
commit	1c0a9a3ddd8de6404819239937389c5a85edc823	[log] [tgz]
author	James Zern <jzern@google.com>	Mon Mar 27 18:28:38 2023 -0700
committer	James Zern <jzern@google.com>	Wed Mar 29 18:31:46 2023 +0000
tree	0e0866f7b932040c37937e59069983630dd86f1c
parent	8a40f783702f9946ee767f55bbefe369e483769e [diff]