dr_prediction_z2_Nx8_sse4_1: quiet -Wmaybe-unintialized warnings

Reorder some operations to avoid unneeded work when there have been no y
calculations (base_x >= min_base_x).

This change is similar to the one done for NEON:
0484e4d9f5 dr_prediction_z2_Nx8_neon: quiet -Wunintialized warnings

Quiets the following under gcc 12.2.0 targeting x86:
In function ‘_mm_mullo_epi16’,
    inlined from ‘dr_prediction_z2_Nx8_sse4_1’ at aom_dsp/x86/intrapred_sse4.c:700:9,
    inlined from ‘av1_dr_prediction_z2_sse4_1’ at aom_dsp/x86/intrapred_sse4.c:921:7:
/usr/lib/gcc/x86_64-linux-gnu/12/include/emmintrin.h:1162:35: warning:
‘shifty’ may be used uninitialized [-Wmaybe-uninitialized]
 1162 |   return (__m128i) ((__v8hu)__A * (__v8hu)__B);
      |                                   ^~~~~~~~~~~
aom_dsp/x86/intrapred_sse4.c: In function ‘av1_dr_prediction_z2_sse4_1’:
aom_dsp/x86/intrapred_sse4.c:605:34: note: ‘shifty’ was declared here
  605 |     __m128i b, res, res1, shift, shifty;
      |

Change-Id: I4d414fea6cdd4d76c6dff0ce6f1fb25ba9c04a54
diff --git a/aom_dsp/x86/intrapred_sse4.c b/aom_dsp/x86/intrapred_sse4.c
index 3f72dc4..fb30420 100644
--- a/aom_dsp/x86/intrapred_sse4.c
+++ b/aom_dsp/x86/intrapred_sse4.c
@@ -602,7 +602,7 @@
   const __m128i c1234 = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
 
   for (int r = 0; r < N; r++) {
-    __m128i b, res, res1, shift, shifty;
+    __m128i b, res, res1, shift;
     __m128i resx, resy, resxy, r6, ydx;
 
     int y = r + 1;
@@ -620,11 +620,7 @@
     }
 
     if (base_shift > 7) {
-      a0_x = _mm_setzero_si128();
-      a1_x = _mm_setzero_si128();
-      a0_y = _mm_setzero_si128();
-      a1_y = _mm_setzero_si128();
-      shift = _mm_setzero_si128();
+      resx = _mm_setzero_si128();
     } else {
       a0_above = _mm_loadu_si128((__m128i *)(above + base_x + base_shift));
       ydx = _mm_set1_epi16(y * dx);
@@ -649,9 +645,15 @@
       }
       a0_x = _mm_cvtepu8_epi16(a0_above);
       a1_x = _mm_cvtepu8_epi16(a1_above);
-      a0_y = _mm_setzero_si128();
-      a1_y = _mm_setzero_si128();
-      shifty = shift;
+
+      diff = _mm_sub_epi16(a1_x, a0_x);  // a[x+1] - a[x]
+      a32 = _mm_slli_epi16(a0_x, 5);     // a[x] * 32
+      a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
+
+      b = _mm_mullo_epi16(diff, shift);
+      res = _mm_add_epi16(a32, b);
+      res = _mm_srli_epi16(res, 5);
+      resx = _mm_packus_epi16(res, res);
     }
 
     // y calc
@@ -678,34 +680,27 @@
                             left[base_y_c[6]], left[base_y_c[7]]);
 
       if (upsample_left) {
-        shifty = _mm_srli_epi16(
+        shift = _mm_srli_epi16(
             _mm_and_si128(_mm_slli_epi16(y_c, upsample_left), c3f), 1);
       } else {
-        shifty = _mm_srli_epi16(_mm_and_si128(y_c, c3f), 1);
+        shift = _mm_srli_epi16(_mm_and_si128(y_c, c3f), 1);
       }
+
+      diff = _mm_sub_epi16(a1_y, a0_y);  // a[x+1] - a[x]
+      a32 = _mm_slli_epi16(a0_y, 5);     // a[x] * 32
+      a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
+
+      b = _mm_mullo_epi16(diff, shift);
+      res1 = _mm_add_epi16(a32, b);
+      res1 = _mm_srli_epi16(res1, 5);
+
+      resy = _mm_packus_epi16(res1, res1);
+      resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
+      _mm_storel_epi64((__m128i *)dst, resxy);
+    } else {
+      _mm_storel_epi64((__m128i *)dst, resx);
     }
 
-    diff = _mm_sub_epi16(a1_x, a0_x);  // a[x+1] - a[x]
-    a32 = _mm_slli_epi16(a0_x, 5);     // a[x] * 32
-    a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
-
-    b = _mm_mullo_epi16(diff, shift);
-    res = _mm_add_epi16(a32, b);
-    res = _mm_srli_epi16(res, 5);
-
-    diff = _mm_sub_epi16(a1_y, a0_y);  // a[x+1] - a[x]
-    a32 = _mm_slli_epi16(a0_y, 5);     // a[x] * 32
-    a32 = _mm_add_epi16(a32, a16);     // a[x] * 32 + 16
-
-    b = _mm_mullo_epi16(diff, shifty);
-    res1 = _mm_add_epi16(a32, b);
-    res1 = _mm_srli_epi16(res1, 5);
-
-    resx = _mm_packus_epi16(res, res);
-    resy = _mm_packus_epi16(res1, res1);
-
-    resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
-    _mm_storel_epi64((__m128i *)(dst), resxy);
     dst += stride;
   }
 }