//===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H #define LLVM_SUPPORT_BINARYSTREAMARRAY_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/iterator.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Error.h" #include <cassert> #include <cstdint> /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file /// provides two different array implementations. /// /// VarStreamArray - Arrays of variable length records. The user specifies /// an Extractor type that can extract a record from a given offset and /// return the number of bytes consumed by the record. /// /// FixedStreamArray - Arrays of fixed length records. This is similar in /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the /// elements of the array need not be laid out in contiguous memory. namespace llvm { /// VarStreamArrayExtractor is intended to be specialized to provide customized /// extraction logic. On input it receives a BinaryStreamRef pointing to the /// beginning of the next record, but where the length of the record is not yet /// known. Upon completion, it should return an appropriate Error instance if /// a record could not be extracted, or if one could be extracted it should /// return success and set Len to the number of bytes this record occupied in /// the underlying stream, and it should fill out the fields of the value type /// Item appropriately to represent the current record. /// /// You can specialize this template for your own custom value types to avoid /// having to specify a second template argument to VarStreamArray (documented /// below). template <typename T> struct VarStreamArrayExtractor { // Method intentionally deleted. You must provide an explicit specialization // with the following method implemented. Error operator()(BinaryStreamRef Stream, uint32_t &Len, T &Item) const = delete; }; /// VarStreamArray represents an array of variable length records backed by a /// stream. This could be a contiguous sequence of bytes in memory, it could /// be a file on disk, or it could be a PDB stream where bytes are stored as /// discontiguous blocks in a file. Usually it is desirable to treat arrays /// as contiguous blocks of memory, but doing so with large PDB files, for /// example, could mean allocating huge amounts of memory just to allow /// re-ordering of stream data to be contiguous before iterating over it. By /// abstracting this out, we need not duplicate this memory, and we can /// iterate over arrays in arbitrarily formatted streams. Elements are parsed /// lazily on iteration, so there is no upfront cost associated with building /// or copying a VarStreamArray, no matter how large it may be. /// /// You create a VarStreamArray by specifying a ValueType and an Extractor type. /// If you do not specify an Extractor type, you are expected to specialize /// VarStreamArrayExtractor<T> for your ValueType. /// /// By default an Extractor is default constructed in the class, but in some /// cases you might find it useful for an Extractor to maintain state across /// extractions. In this case you can provide your own Extractor through a /// secondary constructor. The following examples show various ways of /// creating a VarStreamArray. /// /// // Will use VarStreamArrayExtractor<MyType> as the extractor. /// VarStreamArray<MyType> MyTypeArray; /// /// // Will use a default-constructed MyExtractor as the extractor. /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; /// /// // Will use the specific instance of MyExtractor provided. /// // MyExtractor need not be default-constructible in this case. /// MyExtractor E(SomeContext); /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); /// template <typename ValueType, typename Extractor> class VarStreamArrayIterator; template <typename ValueType, typename Extractor = VarStreamArrayExtractor<ValueType>> class VarStreamArray { friend class VarStreamArrayIterator<ValueType, Extractor>; public: typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; VarStreamArray() = default; explicit VarStreamArray(const Extractor &E) : E(E) {} explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) : Stream(Stream), Skew(Skew) {} VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) : Stream(Stream), E(E), Skew(Skew) {} Iterator begin(bool *HadError = nullptr) const { return Iterator(*this, E, Skew, nullptr); } bool valid() const { return Stream.valid(); } uint32_t skew() const { return Skew; } Iterator end() const { return Iterator(E); } bool empty() const { return Stream.getLength() == 0; } VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, uint32_t End) const { assert(Begin >= Skew); // We should never cut off the beginning of the stream since it might be // skewed, meaning the initial bytes are important. BinaryStreamRef NewStream = Stream.slice(0, End); return {NewStream, E, Begin}; } /// given an offset into the array's underlying stream, return an /// iterator to the record at that offset. This is considered unsafe /// since the behavior is undefined if \p Offset does not refer to the /// beginning of a valid record. Iterator at(uint32_t Offset) const { return Iterator(*this, E, Offset, nullptr); } const Extractor &getExtractor() const { return E; } Extractor &getExtractor() { return E; } BinaryStreamRef getUnderlyingStream() const { return Stream; } void setUnderlyingStream(BinaryStreamRef S, uint32_t Skew = 0) { Stream = S; this->Skew = Skew; } void drop_front() { Skew += begin()->length(); } private: BinaryStreamRef Stream; Extractor E; uint32_t Skew; }; template <typename ValueType, typename Extractor> class VarStreamArrayIterator : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, std::forward_iterator_tag, ValueType> { typedef VarStreamArrayIterator<ValueType, Extractor> IterType; typedef VarStreamArray<ValueType, Extractor> ArrayType; public: VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, uint32_t Offset, bool *HadError) : IterRef(Array.Stream.drop_front(Offset)), Extract(E), Array(&Array), AbsOffset(Offset), HadError(HadError) { if (IterRef.getLength() == 0) moveToEnd(); else { auto EC = Extract(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); } } } VarStreamArrayIterator() = default; explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} ~VarStreamArrayIterator() = default; bool operator==(const IterType &R) const { if (Array && R.Array) { // Both have a valid array, make sure they're same. assert(Array == R.Array); return IterRef == R.IterRef; } // Both iterators are at the end. if (!Array && !R.Array) return true; // One is not at the end and one is. return false; } const ValueType &operator*() const { assert(Array && !HasError); return ThisValue; } ValueType &operator*() { assert(Array && !HasError); return ThisValue; } IterType &operator+=(unsigned N) { for (unsigned I = 0; I < N; ++I) { // We are done with the current record, discard it so that we are // positioned at the next record. AbsOffset += ThisLen; IterRef = IterRef.drop_front(ThisLen); if (IterRef.getLength() == 0) { // There is nothing after the current record, we must make this an end // iterator. moveToEnd(); } else { // There is some data after the current record. auto EC = Extract(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); } else if (ThisLen == 0) { // An empty record? Make this an end iterator. moveToEnd(); } } } return *this; } uint32_t offset() const { return AbsOffset; } uint32_t getRecordLength() const { return ThisLen; } private: void moveToEnd() { Array = nullptr; ThisLen = 0; } void markError() { moveToEnd(); HasError = true; if (HadError != nullptr) *HadError = true; } ValueType ThisValue; BinaryStreamRef IterRef; Extractor Extract; const ArrayType *Array{nullptr}; uint32_t ThisLen{0}; uint32_t AbsOffset{0}; bool HasError{false}; bool *HadError{nullptr}; }; template <typename T> class FixedStreamArrayIterator; /// FixedStreamArray is similar to VarStreamArray, except with each record /// having a fixed-length. As with VarStreamArray, there is no upfront /// cost associated with building or copying a FixedStreamArray, as the /// memory for each element is not read from the backing stream until that /// element is iterated. template <typename T> class FixedStreamArray { friend class FixedStreamArrayIterator<T>; public: typedef FixedStreamArrayIterator<T> Iterator; FixedStreamArray() = default; explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { assert(Stream.getLength() % sizeof(T) == 0); } bool operator==(const FixedStreamArray<T> &Other) const { return Stream == Other.Stream; } bool operator!=(const FixedStreamArray<T> &Other) const { return !(*this == Other); } FixedStreamArray &operator=(const FixedStreamArray &) = default; const T &operator[](uint32_t Index) const { assert(Index < size()); uint32_t Off = Index * sizeof(T); ArrayRef<uint8_t> Data; if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { assert(false && "Unexpected failure reading from stream"); // This should never happen since we asserted that the stream length was // an exact multiple of the element size. consumeError(std::move(EC)); } assert(llvm::alignmentAdjustment(Data.data(), alignof(T)) == 0); return *reinterpret_cast<const T *>(Data.data()); } uint32_t size() const { return Stream.getLength() / sizeof(T); } bool empty() const { return size() == 0; } FixedStreamArrayIterator<T> begin() const { return FixedStreamArrayIterator<T>(*this, 0); } FixedStreamArrayIterator<T> end() const { return FixedStreamArrayIterator<T>(*this, size()); } const T &front() const { return *begin(); } const T &back() const { FixedStreamArrayIterator<T> I = end(); return *(--I); } BinaryStreamRef getUnderlyingStream() const { return Stream; } private: BinaryStreamRef Stream; }; template <typename T> class FixedStreamArrayIterator : public iterator_facade_base<FixedStreamArrayIterator<T>, std::random_access_iterator_tag, const T> { public: FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) : Array(Array), Index(Index) {} FixedStreamArrayIterator<T> & operator=(const FixedStreamArrayIterator<T> &Other) { Array = Other.Array; Index = Other.Index; return *this; } const T &operator*() const { return Array[Index]; } const T &operator*() { return Array[Index]; } bool operator==(const FixedStreamArrayIterator<T> &R) const { assert(Array == R.Array); return (Index == R.Index) && (Array == R.Array); } FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { Index += N; return *this; } FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { assert(std::ptrdiff_t(Index) >= N); Index -= N; return *this; } std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { assert(Array == R.Array); assert(Index >= R.Index); return Index - R.Index; } bool operator<(const FixedStreamArrayIterator<T> &RHS) const { assert(Array == RHS.Array); return Index < RHS.Index; } private: FixedStreamArray<T> Array; uint32_t Index; }; } // namespace llvm #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H