replace per-element copy with memcpy
memcpy should already include SSE/AVX optimization.
Observed 0.3% encoding time reduction measured against
two-pass encoding of 720p at speed-5.
Change-Id: I7520984b65de32c4b899336d82c1e3c444ab6153
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index beada59..d3ae348 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -165,17 +165,13 @@
return best_tot_mse;
}
-/* FIXME: SSE-optimize this. */
static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
int src_voffset, int src_hoffset, int sstride,
int vsize, int hsize) {
- int r, c;
+ int r;
const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
- for (r = 0; r < vsize; r++) {
- for (c = 0; c < hsize; c++) {
- dst[r * dstride + c] = base[r * sstride + c];
- }
- }
+ for (r = 0; r < vsize; r++)
+ memcpy(dst + r * dstride, base + r * sstride, hsize * sizeof(*base));
}
#if CONFIG_DIST_8X8