| /* |
| * Copyright (c) 2025, Alliance for Open Media. All rights reserved. |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| #ifndef AOM_AOM_DSP_REDUCE_SUM_HWY_H_ |
| #define AOM_AOM_DSP_REDUCE_SUM_HWY_H_ |
| |
| #include <type_traits> |
| #include "third_party/highway/hwy/highway.h" |
| |
| HWY_BEFORE_NAMESPACE(); |
| |
| namespace { |
| namespace HWY_NAMESPACE { |
| |
| namespace hn = hwy::HWY_NAMESPACE; |
| |
| template <size_t NumBlocks> |
| struct BlockReduceTraits; |
| |
| template <> |
| struct BlockReduceTraits<1> { |
| template <typename D> |
| HWY_ATTR HWY_INLINE static hn::VFromD<D> ReduceSum(D d, hn::VFromD<D> v) { |
| (void)d; |
| return v; |
| } |
| }; |
| |
| template <size_t NumBlocks> |
| struct BlockReduceTraits { |
| static_assert(NumBlocks > 1, |
| "Primary template BlockReduceTraits assumes NumBlocks > 1"); |
| static_assert((NumBlocks & (NumBlocks - 1)) == 0, |
| "BlockReduceTraits requires NumBlocks to be a power of 2."); |
| |
| template <typename D> |
| HWY_ATTR HWY_INLINE static hn::VFromD<hn::BlockDFromD<D>> ReduceSum( |
| D d, hn::VFromD<D> v) { |
| (void)d; |
| constexpr hn::Half<D> half_d; |
| auto v_half = hn::Add(hn::LowerHalf(half_d, v), hn::UpperHalf(half_d, v)); |
| return BlockReduceTraits<NumBlocks / 2>::ReduceSum(half_d, v_half); |
| } |
| }; |
| |
| // ReduceSum across blocks. |
| // For example, with a 4-block vector with 16 lanes of uint32_t: |
| // [a3 b3 c3 d3 a2 b2 c2 d2 a1 b1 c1 d1 a0 b0 c0 d0] |
| // returns a vector with 4 lanes: |
| // [a3+a2+a1+a0 b3+b2+b1+b0 c3+c2+c1+c0 d3+d2+d1+d0] |
| template <typename D> |
| HWY_ATTR HWY_INLINE hn::Vec<hn::BlockDFromD<D>> BlockReduceSum( |
| D int_tag, hn::VFromD<D> v) { |
| return BlockReduceTraits<int_tag.MaxBlocks()>::ReduceSum(int_tag, v); |
| } |
| |
| } // namespace HWY_NAMESPACE |
| } // namespace |
| |
| HWY_AFTER_NAMESPACE(); |
| |
| #endif // AOM_AOM_DSP_REDUCE_SUM_HWY_H_ |