Optimize compute_stats_highbd()
Change-Id: Id10367bd80f74c2646f603741dc4ce1655d44d9e
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index f41a473..201b2a5 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -589,10 +589,10 @@
return avg;
}
-static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8,
- const uint8_t *src8, int h_start, int h_end,
- int v_start, int v_end, int dgd_stride,
- int src_stride, double *M, double *H) {
+static AOM_FORCE_INLINE void compute_stats_highbd(
+ int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start,
+ int h_end, int v_start, int v_end, int dgd_stride, int src_stride,
+ double *M, double *H) {
int i, j, k, l;
double Y[WIENER_WIN2];
const int wiener_win2 = wiener_win * wiener_win;
@@ -616,13 +616,15 @@
}
assert(idx == wiener_win2);
for (k = 0; k < wiener_win2; ++k) {
- M[k] += Y[k] * X;
- H[k * wiener_win2 + k] += Y[k] * Y[k];
+ double Yk = Y[k];
+ M[k] += Yk * X;
+ double *H2 = &H[k * wiener_win2];
+ H2[k] += Yk * Yk;
for (l = k + 1; l < wiener_win2; ++l) {
// H is a symmetric matrix, so we only need to fill out the upper
// triangle here. We can copy it down to the lower triangle outside
// the (i, j) loops.
- H[k * wiener_win2 + l] += Y[k] * Y[l];
+ H2[l] += Yk * Y[l];
}
}
}
@@ -997,14 +999,23 @@
double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN];
const AV1_COMMON *const cm = rsc->cm;
- if (cm->use_highbitdepth)
- compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
- limits->h_start, limits->h_end, limits->v_start,
- limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
- else
+ if (cm->use_highbitdepth) {
+ if (rsc->plane == AOM_PLANE_Y) {
+ compute_stats_highbd(WIENER_WIN, rsc->dgd_buffer, rsc->src_buffer,
+ limits->h_start, limits->h_end, limits->v_start,
+ limits->v_end, rsc->dgd_stride, rsc->src_stride, M,
+ H);
+ } else {
+ compute_stats_highbd(WIENER_WIN_CHROMA, rsc->dgd_buffer, rsc->src_buffer,
+ limits->h_start, limits->h_end, limits->v_start,
+ limits->v_end, rsc->dgd_stride, rsc->src_stride, M,
+ H);
+ }
+ } else {
compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, limits->h_start,
limits->h_end, limits->v_start, limits->v_end,
rsc->dgd_stride, rsc->src_stride, M, H);
+ }
const MACROBLOCK *const x = rsc->x;
const int64_t bits_none = x->wiener_restore_cost[0];