| // Copyright 2020 Google LLC |
| // SPDX-License-Identifier: Apache-2.0 |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Demo of functions that might be called from multiple SIMD modules (either |
| // other -inl.h files, or a .cc file between begin/end_target-inl). This is |
| // optional - all SIMD code can reside in .cc files. However, this allows |
| // splitting code into different files while still inlining instead of requiring |
| // calling through function pointers. |
| |
| // Per-target include guard. This is only required when using dynamic dispatch, |
| // i.e. including foreach_target.h. For static dispatch, a normal include |
| // guard would be fine because the header is only compiled once. |
| #if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE) |
| #ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ |
| #undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ |
| #else |
| #define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ |
| #endif |
| |
| // It is fine to #include normal or *-inl headers. |
| #include "third_party/highway/hwy/highway.h" |
| |
| HWY_BEFORE_NAMESPACE(); |
| namespace skeleton { |
| namespace HWY_NAMESPACE { |
| |
| // Highway ops reside here; ADL does not find templates nor builtins. |
| namespace hn = hwy::HWY_NAMESPACE; |
| |
| // Example of a type-agnostic (caller-specified lane type) and width-agnostic |
| // (uses best available instruction set) function in a header. |
| // |
| // Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size. |
| template <class D, typename T> |
| HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array, |
| const T* HWY_RESTRICT add_array, |
| const size_t size, T* HWY_RESTRICT x_array) { |
| for (size_t i = 0; i < size; i += hn::Lanes(d)) { |
| const auto mul = hn::Load(d, mul_array + i); |
| const auto add = hn::Load(d, add_array + i); |
| auto x = hn::Load(d, x_array + i); |
| x = hn::MulAdd(mul, x, add); |
| hn::Store(x, d, x_array + i); |
| } |
| } |
| |
| // NOLINTNEXTLINE(google-readability-namespace-comments) |
| } // namespace HWY_NAMESPACE |
| } // namespace skeleton |
| HWY_AFTER_NAMESPACE(); |
| |
| #endif // include guard |