| // Copyright 2020 Google LLC |
| // SPDX-License-Identifier: Apache-2.0 |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ |
| #define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ |
| |
| // SIMD/multicore-friendly planar image representation with row accessors. |
| |
| #include <string.h> |
| |
| #include <utility> // std::move |
| |
| #include "third_party/highway/hwy/aligned_allocator.h" |
| #include "third_party/highway/hwy/base.h" |
| |
| namespace hwy { |
| |
| // Type-independent parts of Image<> - reduces code duplication and facilitates |
| // moving member function implementations to cc file. |
| struct HWY_CONTRIB_DLLEXPORT ImageBase { |
| // Returns required alignment in bytes for externally allocated memory. |
| static size_t VectorSize(); |
| |
| // Returns distance [bytes] between the start of two consecutive rows, a |
| // multiple of VectorSize but NOT kAlias (see implementation). |
| static size_t BytesPerRow(size_t xsize, size_t sizeof_t); |
| |
| // No allocation (for output params or unused images) |
| ImageBase() |
| : xsize_(0), |
| ysize_(0), |
| bytes_per_row_(0), |
| bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {} |
| |
| // Allocates memory (this is the common case) |
| ImageBase(size_t xsize, size_t ysize, size_t sizeof_t); |
| |
| // References but does not take ownership of external memory. Useful for |
| // interoperability with other libraries. `aligned` must be aligned to a |
| // multiple of VectorSize() and `bytes_per_row` must also be a multiple of |
| // VectorSize() or preferably equal to BytesPerRow(). |
| ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned); |
| |
| // Copy construction/assignment is forbidden to avoid inadvertent copies, |
| // which can be very expensive. Use CopyImageTo() instead. |
| ImageBase(const ImageBase& other) = delete; |
| ImageBase& operator=(const ImageBase& other) = delete; |
| |
| // Move constructor (required for returning Image from function) |
| ImageBase(ImageBase&& other) noexcept = default; |
| |
| // Move assignment (required for std::vector) |
| ImageBase& operator=(ImageBase&& other) noexcept = default; |
| |
| void Swap(ImageBase& other); |
| |
| // Useful for pre-allocating image with some padding for alignment purposes |
| // and later reporting the actual valid dimensions. Caller is responsible |
| // for ensuring xsize/ysize are <= the original dimensions. |
| void ShrinkTo(const size_t xsize, const size_t ysize) { |
| xsize_ = static_cast<uint32_t>(xsize); |
| ysize_ = static_cast<uint32_t>(ysize); |
| // NOTE: we can't recompute bytes_per_row for more compact storage and |
| // better locality because that would invalidate the image contents. |
| } |
| |
| // How many pixels. |
| HWY_INLINE size_t xsize() const { return xsize_; } |
| HWY_INLINE size_t ysize() const { return ysize_; } |
| |
| // NOTE: do not use this for copying rows - the valid xsize may be much less. |
| HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; } |
| |
| // Raw access to byte contents, for interfacing with other libraries. |
| // Unsigned char instead of char to avoid surprises (sign extension). |
| HWY_INLINE uint8_t* bytes() { |
| void* p = bytes_.get(); |
| return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64)); |
| } |
| HWY_INLINE const uint8_t* bytes() const { |
| const void* p = bytes_.get(); |
| return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64)); |
| } |
| |
| protected: |
| // Returns pointer to the start of a row. |
| HWY_INLINE void* VoidRow(const size_t y) const { |
| #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN |
| if (y >= ysize_) { |
| HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_); |
| } |
| #endif |
| |
| void* row = bytes_.get() + y * bytes_per_row_; |
| return HWY_ASSUME_ALIGNED(row, 64); |
| } |
| |
| enum class Padding { |
| // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default. |
| kRoundUp, |
| // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra |
| // vector to be initialized. If done by default, this would suppress |
| // legitimate msan warnings. We therefore require users to explicitly call |
| // InitializePadding before using unaligned loads (e.g. convolution). |
| kUnaligned |
| }; |
| |
| // Initializes the minimum bytes required to suppress msan warnings from |
| // legitimate (according to Padding mode) vector loads/stores on the right |
| // border, where some lanes are uninitialized and assumed to be unused. |
| void InitializePadding(size_t sizeof_t, Padding padding); |
| |
| // (Members are non-const to enable assignment during move-assignment.) |
| uint32_t xsize_; // In valid pixels, not including any padding. |
| uint32_t ysize_; |
| size_t bytes_per_row_; // Includes padding. |
| AlignedFreeUniquePtr<uint8_t[]> bytes_; |
| }; |
| |
| // Single channel, aligned rows separated by padding. T must be POD. |
| // |
| // 'Single channel' (one 2D array per channel) simplifies vectorization |
| // (repeating the same operation on multiple adjacent components) without the |
| // complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients |
| // can easily iterate over all components in a row and Image requires no |
| // knowledge of the pixel format beyond the component type "T". |
| // |
| // 'Aligned' means each row is aligned to the L1 cache line size. This prevents |
| // false sharing between two threads operating on adjacent rows. |
| // |
| // 'Padding' is still relevant because vectors could potentially be larger than |
| // a cache line. By rounding up row sizes to the vector size, we allow |
| // reading/writing ALIGNED vectors whose first lane is a valid sample. This |
| // avoids needing a separate loop to handle remaining unaligned lanes. |
| // |
| // This image layout could also be achieved with a vector and a row accessor |
| // function, but a class wrapper with support for "deleter" allows wrapping |
| // existing memory allocated by clients without copying the pixels. It also |
| // provides convenient accessors for xsize/ysize, which shortens function |
| // argument lists. Supports move-construction so it can be stored in containers. |
| template <typename ComponentType> |
| class Image : public ImageBase { |
| public: |
| using T = ComponentType; |
| |
| Image() = default; |
| Image(const size_t xsize, const size_t ysize) |
| : ImageBase(xsize, ysize, sizeof(T)) {} |
| Image(const size_t xsize, const size_t ysize, size_t bytes_per_row, |
| void* aligned) |
| : ImageBase(xsize, ysize, bytes_per_row, aligned) {} |
| |
| void InitializePaddingForUnalignedAccesses() { |
| InitializePadding(sizeof(T), Padding::kUnaligned); |
| } |
| |
| HWY_INLINE const T* ConstRow(const size_t y) const { |
| return static_cast<const T*>(VoidRow(y)); |
| } |
| HWY_INLINE const T* ConstRow(const size_t y) { |
| return static_cast<const T*>(VoidRow(y)); |
| } |
| |
| // Returns pointer to non-const. This allows passing const Image* parameters |
| // when the callee is only supposed to fill the pixels, as opposed to |
| // allocating or resizing the image. |
| HWY_INLINE T* MutableRow(const size_t y) const { |
| return static_cast<T*>(VoidRow(y)); |
| } |
| HWY_INLINE T* MutableRow(const size_t y) { |
| return static_cast<T*>(VoidRow(y)); |
| } |
| |
| // Returns number of pixels (some of which are padding) per row. Useful for |
| // computing other rows via pointer arithmetic. WARNING: this must |
| // NOT be used to determine xsize. |
| HWY_INLINE intptr_t PixelsPerRow() const { |
| return static_cast<intptr_t>(bytes_per_row_ / sizeof(T)); |
| } |
| }; |
| |
| using ImageF = Image<float>; |
| |
| // A bundle of 3 same-sized images. To fill an existing Image3 using |
| // single-channel producers, we also need access to each const Image*. Const |
| // prevents breaking the same-size invariant, while still allowing pixels to be |
| // changed via MutableRow. |
| template <typename ComponentType> |
| class Image3 { |
| public: |
| using T = ComponentType; |
| using ImageT = Image<T>; |
| static constexpr size_t kNumPlanes = 3; |
| |
| Image3() : planes_{ImageT(), ImageT(), ImageT()} {} |
| |
| Image3(const size_t xsize, const size_t ysize) |
| : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize), |
| ImageT(xsize, ysize)} {} |
| |
| Image3(Image3&& other) noexcept { |
| for (size_t i = 0; i < kNumPlanes; i++) { |
| planes_[i] = std::move(other.planes_[i]); |
| } |
| } |
| |
| Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) { |
| if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) { |
| HWY_ABORT( |
| "Not same size: %d x %d, %d x %d, %d x %d\n", |
| static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()), |
| static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()), |
| static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize())); |
| } |
| planes_[0] = std::move(plane0); |
| planes_[1] = std::move(plane1); |
| planes_[2] = std::move(plane2); |
| } |
| |
| // Copy construction/assignment is forbidden to avoid inadvertent copies, |
| // which can be very expensive. Use CopyImageTo instead. |
| Image3(const Image3& other) = delete; |
| Image3& operator=(const Image3& other) = delete; |
| |
| Image3& operator=(Image3&& other) noexcept { |
| for (size_t i = 0; i < kNumPlanes; i++) { |
| planes_[i] = std::move(other.planes_[i]); |
| } |
| return *this; |
| } |
| |
| HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const { |
| return static_cast<const T*>(VoidPlaneRow(c, y)); |
| } |
| HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) { |
| return static_cast<const T*>(VoidPlaneRow(c, y)); |
| } |
| |
| HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const { |
| return static_cast<T*>(VoidPlaneRow(c, y)); |
| } |
| HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) { |
| return static_cast<T*>(VoidPlaneRow(c, y)); |
| } |
| |
| HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; } |
| |
| void Swap(Image3& other) { |
| for (size_t c = 0; c < 3; ++c) { |
| other.planes_[c].Swap(planes_[c]); |
| } |
| } |
| |
| void ShrinkTo(const size_t xsize, const size_t ysize) { |
| for (ImageT& plane : planes_) { |
| plane.ShrinkTo(xsize, ysize); |
| } |
| } |
| |
| // Sizes of all three images are guaranteed to be equal. |
| HWY_INLINE size_t xsize() const { return planes_[0].xsize(); } |
| HWY_INLINE size_t ysize() const { return planes_[0].ysize(); } |
| // Returns offset [bytes] from one row to the next row of the same plane. |
| // WARNING: this must NOT be used to determine xsize, nor for copying rows - |
| // the valid xsize may be much less. |
| HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); } |
| // Returns number of pixels (some of which are padding) per row. Useful for |
| // computing other rows via pointer arithmetic. WARNING: this must NOT be used |
| // to determine xsize. |
| HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); } |
| |
| private: |
| // Returns pointer to the start of a row. |
| HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const { |
| #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN |
| if (c >= kNumPlanes || y >= ysize()) { |
| HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c), |
| static_cast<int>(y), static_cast<int>(ysize())); |
| } |
| #endif |
| // Use the first plane's stride because the compiler might not realize they |
| // are all equal. Thus we only need a single multiplication for all planes. |
| const size_t row_offset = y * planes_[0].bytes_per_row(); |
| const void* row = planes_[c].bytes() + row_offset; |
| return static_cast<const T * HWY_RESTRICT>( |
| HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT)); |
| } |
| |
| private: |
| ImageT planes_[kNumPlanes]; |
| }; |
| |
| using Image3F = Image3<float>; |
| |
| // Rectangular region in image(s). Factoring this out of Image instead of |
| // shifting the pointer by x0/y0 allows this to apply to multiple images with |
| // different resolutions. Can compare size via SameSize(rect1, rect2). |
| class Rect { |
| public: |
| // Most windows are xsize_max * ysize_max, except those on the borders where |
| // begin + size_max > end. |
| constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max, |
| size_t ysize_max, size_t xend, size_t yend) |
| : x0_(xbegin), |
| y0_(ybegin), |
| xsize_(ClampedSize(xbegin, xsize_max, xend)), |
| ysize_(ClampedSize(ybegin, ysize_max, yend)) {} |
| |
| // Construct with origin and known size (typically from another Rect). |
| constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize) |
| : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {} |
| |
| // Construct a rect that covers a whole image. |
| template <typename Image> |
| explicit Rect(const Image& image) |
| : Rect(0, 0, image.xsize(), image.ysize()) {} |
| |
| Rect() : Rect(0, 0, 0, 0) {} |
| |
| Rect(const Rect&) = default; |
| Rect& operator=(const Rect&) = default; |
| |
| Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max, |
| size_t ysize_max) { |
| return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_, |
| y0_ + ysize_); |
| } |
| |
| template <typename T> |
| const T* ConstRow(const Image<T>* image, size_t y) const { |
| return image->ConstRow(y + y0_) + x0_; |
| } |
| |
| template <typename T> |
| T* MutableRow(const Image<T>* image, size_t y) const { |
| return image->MutableRow(y + y0_) + x0_; |
| } |
| |
| template <typename T> |
| const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const { |
| return image.ConstPlaneRow(c, y + y0_) + x0_; |
| } |
| |
| template <typename T> |
| T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const { |
| return image->MutablePlaneRow(c, y + y0_) + x0_; |
| } |
| |
| // Returns true if this Rect fully resides in the given image. ImageT could be |
| // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical. |
| template <class ImageT> |
| bool IsInside(const ImageT& image) const { |
| return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize()); |
| } |
| |
| size_t x0() const { return x0_; } |
| size_t y0() const { return y0_; } |
| size_t xsize() const { return xsize_; } |
| size_t ysize() const { return ysize_; } |
| |
| private: |
| // Returns size_max, or whatever is left in [begin, end). |
| static constexpr size_t ClampedSize(size_t begin, size_t size_max, |
| size_t end) { |
| return (begin + size_max <= end) ? size_max |
| : (end > begin ? end - begin : 0); |
| } |
| |
| size_t x0_; |
| size_t y0_; |
| |
| size_t xsize_; |
| size_t ysize_; |
| }; |
| |
| // Works for any image-like input type(s). |
| template <class Image1, class Image2> |
| HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) { |
| return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize(); |
| } |
| |
| // Mirrors out of bounds coordinates and returns valid coordinates unchanged. |
| // We assume the radius (distance outside the image) is small compared to the |
| // image size, otherwise this might not terminate. |
| // The mirror is outside the last column (border pixel is also replicated). |
| static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x, |
| const int64_t xsize) { |
| HWY_DASSERT(xsize != 0); |
| |
| // TODO(janwas): replace with branchless version |
| while (x < 0 || x >= xsize) { |
| if (x < 0) { |
| x = -x - 1; |
| } else { |
| x = 2 * xsize - 1 - x; |
| } |
| } |
| return static_cast<size_t>(x); |
| } |
| |
| // Wrap modes for ensuring X/Y coordinates are in the valid range [0, size): |
| |
| // Mirrors (repeating the edge pixel once). Useful for convolutions. |
| struct WrapMirror { |
| HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const { |
| return Mirror(coord, static_cast<int64_t>(size)); |
| } |
| }; |
| |
| // Returns the same coordinate, for when we know "coord" is already valid (e.g. |
| // interior of an image). |
| struct WrapUnchanged { |
| HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const { |
| return static_cast<size_t>(coord); |
| } |
| }; |
| |
| // Similar to Wrap* but for row pointers (reduces Row() multiplications). |
| |
| class WrapRowMirror { |
| public: |
| template <class View> |
| WrapRowMirror(const View& image, size_t ysize) |
| : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {} |
| |
| const float* operator()(const float* const HWY_RESTRICT row, |
| const int64_t stride) const { |
| if (row < first_row_) { |
| const int64_t num_before = first_row_ - row; |
| // Mirrored; one row before => row 0, two before = row 1, ... |
| return first_row_ + num_before - stride; |
| } |
| if (row > last_row_) { |
| const int64_t num_after = row - last_row_; |
| // Mirrored; one row after => last row, two after = last - 1, ... |
| return last_row_ - num_after + stride; |
| } |
| return row; |
| } |
| |
| private: |
| const float* const HWY_RESTRICT first_row_; |
| const float* const HWY_RESTRICT last_row_; |
| }; |
| |
| struct WrapRowUnchanged { |
| HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row, |
| int64_t /*stride*/) const { |
| return row; |
| } |
| }; |
| |
| } // namespace hwy |
| |
| #endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ |