Add scaling to fix b/278065963.

(Minor changes are possible in potential overflow conditions.)

STATS_CHANGED

Bug: b/278065963
Change-Id: I78b35feb1e2dc3012fb3ee341c17c0ffb41ed33c
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 972327a..a404663 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -1097,15 +1097,41 @@
         b[i - 1] = c;
       }
     }
+
+    // b/278065963: The multiplies
+    //   c / 256 * A[k * stride + j] / cd * 256
+    // and
+    //   c / 256 * b[k] / cd * 256
+    // within Gaussian elimination can cause a signed integer overflow. Rework
+    // the multiplies so that larger scaling is used without significantly
+    // impacting the overall precision.
+    //
+    // Precision guidance:
+    //   scale_threshold: Pick as high as possible.
+    // For max_abs_akj >= scale_threshold scenario:
+    //   scaler_A: Pick as low as possible. Needed for A[(i + 1) * stride + j].
+    //   scaler_c: Pick as low as possible while maintaining scaler_c >=
+    //     (1 << 7). Needed for A[(i + 1) * stride + j] and b[i + 1].
+    int64_t max_abs_akj = 0;
+    for (int j = 0; j < n; j++) {
+      const int64_t abs_akj = llabs(A[k * stride + j]);
+      if (abs_akj > max_abs_akj) max_abs_akj = abs_akj;
+    }
+    const int scale_threshold = 1 << 22;
+    const int scaler_A = max_abs_akj < scale_threshold ? 1 : (1 << 4);
+    const int scaler_c = max_abs_akj < scale_threshold ? 1 : (1 << 7);
+    const int scaler = scaler_c * scaler_A;
+
     // Forward elimination (convert A to row-echelon form)
     for (int i = k; i < n - 1; i++) {
       if (A[k * stride + k] == 0) return 0;
-      const int64_t c = A[(i + 1) * stride + k];
+      const int64_t c = A[(i + 1) * stride + k] / scaler_c;
       const int64_t cd = A[k * stride + k];
       for (int j = 0; j < n; j++) {
-        A[(i + 1) * stride + j] -= c / 256 * A[k * stride + j] / cd * 256;
+        A[(i + 1) * stride + j] -=
+            A[k * stride + j] / scaler_A * c / cd * scaler;
       }
-      b[i + 1] -= c / 256 * b[k] / cd * 256;
+      b[i + 1] -= c * b[k] / cd * scaler_c;
     }
   }
   // Back-substitution