blob: f2f0c903935d4ffd53d2423b59e3f0924a782cf0 [file] [log] [blame]
/*
* Copyright (c) 2023, Alliance for Open Media. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef AOM_AOM_DSP_ARM_DOT_SVE_H_
#define AOM_AOM_DSP_ARM_DOT_SVE_H_
#include <arm_neon_sve_bridge.h>
#include "config/aom_dsp_rtcd.h"
#include "config/aom_config.h"
// Dot product instructions operating on 16-bit input elements are exclusive to
// the SVE instruction set. However, we can access these instructions from a
// predominantly Neon context by making use of the Neon-SVE bridge intrinsics
// to reinterpret Neon vectors as SVE vectors - with the high part of the SVE
// vector (if it's longer than 128 bits) being "don't care".
// While sub-optimal on machines that have SVE vector length > 128-bit - as the
// remainder of the vector is unused - this approach is still beneficial when
// compared to a Neon-only solution.
static INLINE int64x2_t aom_sdotq_s16(int64x2_t acc, int16x8_t x, int16x8_t y) {
return svget_neonq_s64(svdot_s64(svset_neonq_s64(svundef_s64(), acc),
svset_neonq_s16(svundef_s16(), x),
svset_neonq_s16(svundef_s16(), y)));
}
#endif // AOM_AOM_DSP_ARM_DOT_SVE_H_