Reduce prec of matrices/vectors for warp estimate
Reduces precision of matrices by 2 bits.
No material change in performance.
Change-Id: I549a27da1dcb381fb329c345ee280dbd86b45bac
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 11fdd7b..c74609d 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -1425,6 +1425,25 @@
#define LS_MV_MAX 256 // max mv in 1/8-pel
#define LS_STEP 2
+// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
+// the precision needed is:
+// (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +
+// (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +
+// 1 [for sign] +
+// LEAST_SQUARES_SAMPLES_MAX_BITS
+// [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]
+// The value is 23
+#define LS_MAT_RANGE_BITS \
+ ((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)
+
+// Bit-depth reduction from the full-range
+#define LS_MAT_DOWN_BITS 2
+
+// bits range of A, Bx and By after downshifting
+#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)
+#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
+#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
+
#define LS_SUM(a) ((a)*4 + LS_STEP * 2)
#define LS_SQUARE(a) \
(((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
@@ -1489,9 +1508,32 @@
n++;
}
}
- int64_t Px[2], Py[2];
- int64_t iDet, Det, v;
- int16_t shift;
+ int downshift;
+ if (n >= 4)
+ downshift = LS_MAT_DOWN_BITS;
+ else if (n >= 2)
+ downshift = LS_MAT_DOWN_BITS - 1;
+ else
+ downshift = LS_MAT_DOWN_BITS - 2;
+
+ // Reduce precision by downshift bits
+ A[0][0] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][0], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+ A[0][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][1], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+ A[1][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[1][1], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+ Bx[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[0], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+ Bx[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[1], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+ By[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[0], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+ By[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[1], downshift), LS_MAT_MIN,
+ LS_MAT_MAX);
+
+ int64_t Px[2], Py[2], Det;
+ int16_t iDet, shift;
// These divided by the Det, are the least squares solutions
Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
@@ -1509,16 +1551,17 @@
shift = 0;
}
- v = Px[0] * iDet;
+ int64_t v;
+ v = Px[0] * (int64_t)iDet;
wm->wmmat[2] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
- v = Px[1] * iDet;
+ v = Px[1] * (int64_t)iDet;
wm->wmmat[3] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
v = (dux << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[2] - suy * wm->wmmat[3];
wm->wmmat[0] = ROUND_POWER_OF_TWO_SIGNED(v, 3);
- v = Py[0] * iDet;
+ v = Py[0] * (int64_t)iDet;
wm->wmmat[4] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
- v = Py[1] * iDet;
+ v = Py[1] * (int64_t)iDet;
wm->wmmat[5] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
v = (duy << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[4] - suy * wm->wmmat[5];
wm->wmmat[1] = ROUND_POWER_OF_TWO_SIGNED(v, 3);
@@ -1631,7 +1674,7 @@
C12 = (int64_t)A[0][1] * A[0][2] - (int64_t)A[0][0] * A[1][2];
C22 = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
- // Scale by 1/16
+ // Scale by 1/64
C00 = ROUND_POWER_OF_TWO_SIGNED(C00, 6);
C01 = ROUND_POWER_OF_TWO_SIGNED(C01, 6);
C02 = ROUND_POWER_OF_TWO_SIGNED(C02, 6);