diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /include/llvm/Bitstream | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Notes
Diffstat (limited to 'include/llvm/Bitstream')
-rw-r--r-- | include/llvm/Bitstream/BitCodes.h | 184 | ||||
-rw-r--r-- | include/llvm/Bitstream/BitstreamReader.h | 557 | ||||
-rw-r--r-- | include/llvm/Bitstream/BitstreamWriter.h | 547 |
3 files changed, 1288 insertions, 0 deletions
diff --git a/include/llvm/Bitstream/BitCodes.h b/include/llvm/Bitstream/BitCodes.h new file mode 100644 index 000000000000..adf54ba96396 --- /dev/null +++ b/include/llvm/Bitstream/BitCodes.h @@ -0,0 +1,184 @@ +//===- BitCodes.h - Enum values for the bitstream format --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines bitstream enum values. +// +// The enum values defined in this file should be considered permanent. If +// new features are added, they should have values added at the end of the +// respective lists. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITSTREAM_BITCODES_H +#define LLVM_BITSTREAM_BITCODES_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> + +namespace llvm { +/// Offsets of the 32-bit fields of bitstream wrapper header. +enum BitstreamWrapperHeader : unsigned { + BWH_MagicField = 0 * 4, + BWH_VersionField = 1 * 4, + BWH_OffsetField = 2 * 4, + BWH_SizeField = 3 * 4, + BWH_CPUTypeField = 4 * 4, + BWH_HeaderSize = 5 * 4 +}; + +namespace bitc { + enum StandardWidths { + BlockIDWidth = 8, // We use VBR-8 for block IDs. + CodeLenWidth = 4, // Codelen are VBR-4. + BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block. + }; + + // The standard abbrev namespace always has a way to exit a block, enter a + // nested block, define abbrevs, and define an unabbreviated record. + enum FixedAbbrevIDs { + END_BLOCK = 0, // Must be zero to guarantee termination for broken bitcode. + ENTER_SUBBLOCK = 1, + + /// DEFINE_ABBREV - Defines an abbrev for the current block. It consists + /// of a vbr5 for # operand infos. Each operand info is emitted with a + /// single bit to indicate if it is a literal encoding. If so, the value is + /// emitted with a vbr8. If not, the encoding is emitted as 3 bits followed + /// by the info value as a vbr5 if needed. + DEFINE_ABBREV = 2, + + // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by + // a vbr6 for the # operands, followed by vbr6's for each operand. + UNABBREV_RECORD = 3, + + // This is not a code, this is a marker for the first abbrev assignment. + FIRST_APPLICATION_ABBREV = 4 + }; + + /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO + /// block, which contains metadata about other blocks in the file. + enum StandardBlockIDs { + /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example, + /// standard abbrevs that should be available to all blocks of a specified + /// ID. + BLOCKINFO_BLOCK_ID = 0, + + // Block IDs 1-7 are reserved for future expansion. + FIRST_APPLICATION_BLOCKID = 8 + }; + + /// BlockInfoCodes - The blockinfo block contains metadata about user-defined + /// blocks. + enum BlockInfoCodes { + // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd + // block, instead of the BlockInfo block. + + BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#] + BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name] + BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: + // [id, name] + }; + +} // End bitc namespace + +/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation. +/// This is actually a union of two different things: +/// 1. It could be a literal integer value ("the operand is always 17"). +/// 2. It could be an encoding specification ("this operand encoded like so"). +/// +class BitCodeAbbrevOp { + uint64_t Val; // A literal value or data for an encoding. + bool IsLiteral : 1; // Indicate whether this is a literal value or not. + unsigned Enc : 3; // The encoding to use. +public: + enum Encoding { + Fixed = 1, // A fixed width field, Val specifies number of bits. + VBR = 2, // A VBR field where Val specifies the width of each chunk. + Array = 3, // A sequence of fields, next field species elt encoding. + Char6 = 4, // A 6-bit fixed field which maps to [a-zA-Z0-9._]. + Blob = 5 // 32-bit aligned array of 8-bit characters. + }; + + explicit BitCodeAbbrevOp(uint64_t V) : Val(V), IsLiteral(true) {} + explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0) + : Val(Data), IsLiteral(false), Enc(E) {} + + bool isLiteral() const { return IsLiteral; } + bool isEncoding() const { return !IsLiteral; } + + // Accessors for literals. + uint64_t getLiteralValue() const { assert(isLiteral()); return Val; } + + // Accessors for encoding info. + Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; } + uint64_t getEncodingData() const { + assert(isEncoding() && hasEncodingData()); + return Val; + } + + bool hasEncodingData() const { return hasEncodingData(getEncoding()); } + static bool hasEncodingData(Encoding E) { + switch (E) { + case Fixed: + case VBR: + return true; + case Array: + case Char6: + case Blob: + return false; + } + report_fatal_error("Invalid encoding"); + } + + /// isChar6 - Return true if this character is legal in the Char6 encoding. + static bool isChar6(char C) { + if (C >= 'a' && C <= 'z') return true; + if (C >= 'A' && C <= 'Z') return true; + if (C >= '0' && C <= '9') return true; + if (C == '.' || C == '_') return true; + return false; + } + static unsigned EncodeChar6(char C) { + if (C >= 'a' && C <= 'z') return C-'a'; + if (C >= 'A' && C <= 'Z') return C-'A'+26; + if (C >= '0' && C <= '9') return C-'0'+26+26; + if (C == '.') return 62; + if (C == '_') return 63; + llvm_unreachable("Not a value Char6 character!"); + } + + static char DecodeChar6(unsigned V) { + assert((V & ~63) == 0 && "Not a Char6 encoded character!"); + return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._" + [V]; + } + +}; + +/// BitCodeAbbrev - This class represents an abbreviation record. An +/// abbreviation allows a complex record that has redundancy to be stored in a +/// specialized format instead of the fully-general, fully-vbr, format. +class BitCodeAbbrev { + SmallVector<BitCodeAbbrevOp, 32> OperandList; + +public: + unsigned getNumOperandInfos() const { + return static_cast<unsigned>(OperandList.size()); + } + const BitCodeAbbrevOp &getOperandInfo(unsigned N) const { + return OperandList[N]; + } + + void Add(const BitCodeAbbrevOp &OpInfo) { + OperandList.push_back(OpInfo); + } +}; +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitstream/BitstreamReader.h b/include/llvm/Bitstream/BitstreamReader.h new file mode 100644 index 000000000000..ee82e7ec1ba2 --- /dev/null +++ b/include/llvm/Bitstream/BitstreamReader.h @@ -0,0 +1,557 @@ +//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitstreamReader class. This class can be used to +// read an arbitrary bitstream, regardless of its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITSTREAM_BITSTREAMREADER_H +#define LLVM_BITSTREAM_BITSTREAMREADER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Bitstream/BitCodes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +namespace llvm { + +/// This class maintains the abbreviations read from a block info block. +class BitstreamBlockInfo { +public: + /// This contains information emitted to BLOCKINFO_BLOCK blocks. These + /// describe abbreviations that all blocks of the specified ID inherit. + struct BlockInfo { + unsigned BlockID; + std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs; + std::string Name; + std::vector<std::pair<unsigned, std::string>> RecordNames; + }; + +private: + std::vector<BlockInfo> BlockInfoRecords; + +public: + /// If there is block info for the specified ID, return it, otherwise return + /// null. + const BlockInfo *getBlockInfo(unsigned BlockID) const { + // Common case, the most recent entry matches BlockID. + if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) + return &BlockInfoRecords.back(); + + for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); + i != e; ++i) + if (BlockInfoRecords[i].BlockID == BlockID) + return &BlockInfoRecords[i]; + return nullptr; + } + + BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { + if (const BlockInfo *BI = getBlockInfo(BlockID)) + return *const_cast<BlockInfo*>(BI); + + // Otherwise, add a new record. + BlockInfoRecords.emplace_back(); + BlockInfoRecords.back().BlockID = BlockID; + return BlockInfoRecords.back(); + } +}; + +/// This represents a position within a bitstream. There may be multiple +/// independent cursors reading within one bitstream, each maintaining their +/// own local state. +class SimpleBitstreamCursor { + ArrayRef<uint8_t> BitcodeBytes; + size_t NextChar = 0; + +public: + /// This is the current data we have pulled from the stream but have not + /// returned to the client. This is specifically and intentionally defined to + /// follow the word size of the host machine for efficiency. We use word_t in + /// places that are aware of this to make it perfectly explicit what is going + /// on. + using word_t = size_t; + +private: + word_t CurWord = 0; + + /// This is the number of bits in CurWord that are valid. This is always from + /// [0...bits_of(size_t)-1] inclusive. + unsigned BitsInCurWord = 0; + +public: + static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8; + + SimpleBitstreamCursor() = default; + explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) + : BitcodeBytes(BitcodeBytes) {} + explicit SimpleBitstreamCursor(StringRef BitcodeBytes) + : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {} + explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {} + + bool canSkipToPos(size_t pos) const { + // pos can be skipped to if it is a valid address or one byte past the end. + return pos <= BitcodeBytes.size(); + } + + bool AtEndOfStream() { + return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar; + } + + /// Return the bit # of the bit we are reading. + uint64_t GetCurrentBitNo() const { + return NextChar*CHAR_BIT - BitsInCurWord; + } + + // Return the byte # of the current bit. + uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } + + ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; } + + /// Reset the stream to the specified bit number. + Error JumpToBit(uint64_t BitNo) { + size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); + unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); + assert(canSkipToPos(ByteNo) && "Invalid location"); + + // Move the cursor to the right word. + NextChar = ByteNo; + BitsInCurWord = 0; + + // Skip over any bits that are already consumed. + if (WordBitNo) { + if (Expected<word_t> Res = Read(WordBitNo)) + return Error::success(); + else + return Res.takeError(); + } + + return Error::success(); + } + + /// Get a pointer into the bitstream at the specified byte offset. + const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { + return BitcodeBytes.data() + ByteNo; + } + + /// Get a pointer into the bitstream at the specified bit offset. + /// + /// The bit offset must be on a byte boundary. + const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { + assert(!(BitNo % 8) && "Expected bit on byte boundary"); + return getPointerToByte(BitNo / 8, NumBytes); + } + + Error fillCurWord() { + if (NextChar >= BitcodeBytes.size()) + return createStringError(std::errc::io_error, + "Unexpected end of file reading %u of %u bytes", + NextChar, BitcodeBytes.size()); + + // Read the next word from the stream. + const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar; + unsigned BytesRead; + if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { + BytesRead = sizeof(word_t); + CurWord = + support::endian::read<word_t, support::little, support::unaligned>( + NextCharPtr); + } else { + // Short read. + BytesRead = BitcodeBytes.size() - NextChar; + CurWord = 0; + for (unsigned B = 0; B != BytesRead; ++B) + CurWord |= uint64_t(NextCharPtr[B]) << (B * 8); + } + NextChar += BytesRead; + BitsInCurWord = BytesRead * 8; + return Error::success(); + } + + Expected<word_t> Read(unsigned NumBits) { + static const unsigned BitsInWord = MaxChunkSize; + + assert(NumBits && NumBits <= BitsInWord && + "Cannot return zero or more than BitsInWord bits!"); + + static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; + + // If the field is fully contained by CurWord, return it quickly. + if (BitsInCurWord >= NumBits) { + word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); + + // Use a mask to avoid undefined behavior. + CurWord >>= (NumBits & Mask); + + BitsInCurWord -= NumBits; + return R; + } + + word_t R = BitsInCurWord ? CurWord : 0; + unsigned BitsLeft = NumBits - BitsInCurWord; + + if (Error fillResult = fillCurWord()) + return std::move(fillResult); + + // If we run out of data, abort. + if (BitsLeft > BitsInCurWord) + return createStringError(std::errc::io_error, + "Unexpected end of file reading %u of %u bits", + BitsInCurWord, BitsLeft); + + word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); + + // Use a mask to avoid undefined behavior. + CurWord >>= (BitsLeft & Mask); + + BitsInCurWord -= BitsLeft; + + R |= R2 << (NumBits - BitsLeft); + + return R; + } + + Expected<uint32_t> ReadVBR(unsigned NumBits) { + Expected<unsigned> MaybeRead = Read(NumBits); + if (!MaybeRead) + return MaybeRead; + uint32_t Piece = MaybeRead.get(); + + if ((Piece & (1U << (NumBits-1))) == 0) + return Piece; + + uint32_t Result = 0; + unsigned NextBit = 0; + while (true) { + Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + + if ((Piece & (1U << (NumBits-1))) == 0) + return Result; + + NextBit += NumBits-1; + MaybeRead = Read(NumBits); + if (!MaybeRead) + return MaybeRead; + Piece = MaybeRead.get(); + } + } + + // Read a VBR that may have a value up to 64-bits in size. The chunk size of + // the VBR must still be <= 32 bits though. + Expected<uint64_t> ReadVBR64(unsigned NumBits) { + Expected<uint64_t> MaybeRead = Read(NumBits); + if (!MaybeRead) + return MaybeRead; + uint32_t Piece = MaybeRead.get(); + + if ((Piece & (1U << (NumBits-1))) == 0) + return uint64_t(Piece); + + uint64_t Result = 0; + unsigned NextBit = 0; + while (true) { + Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; + + if ((Piece & (1U << (NumBits-1))) == 0) + return Result; + + NextBit += NumBits-1; + MaybeRead = Read(NumBits); + if (!MaybeRead) + return MaybeRead; + Piece = MaybeRead.get(); + } + } + + void SkipToFourByteBoundary() { + // If word_t is 64-bits and if we've read less than 32 bits, just dump + // the bits we have up to the next 32-bit boundary. + if (sizeof(word_t) > 4 && + BitsInCurWord >= 32) { + CurWord >>= BitsInCurWord-32; + BitsInCurWord = 32; + return; + } + + BitsInCurWord = 0; + } + + /// Return the size of the stream in bytes. + size_t SizeInBytes() const { return BitcodeBytes.size(); } + + /// Skip to the end of the file. + void skipToEnd() { NextChar = BitcodeBytes.size(); } +}; + +/// When advancing through a bitstream cursor, each advance can discover a few +/// different kinds of entries: +struct BitstreamEntry { + enum { + Error, // Malformed bitcode was found. + EndBlock, // We've reached the end of the current block, (or the end of the + // file, which is treated like a series of EndBlock records. + SubBlock, // This is the start of a new subblock of a specific ID. + Record // This is a record with a specific AbbrevID. + } Kind; + + unsigned ID; + + static BitstreamEntry getError() { + BitstreamEntry E; E.Kind = Error; return E; + } + + static BitstreamEntry getEndBlock() { + BitstreamEntry E; E.Kind = EndBlock; return E; + } + + static BitstreamEntry getSubBlock(unsigned ID) { + BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; + } + + static BitstreamEntry getRecord(unsigned AbbrevID) { + BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; + } +}; + +/// This represents a position within a bitcode file, implemented on top of a +/// SimpleBitstreamCursor. +/// +/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not +/// be passed by value. +class BitstreamCursor : SimpleBitstreamCursor { + // This is the declared size of code values used for the current block, in + // bits. + unsigned CurCodeSize = 2; + + /// Abbrevs installed at in this block. + std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs; + + struct Block { + unsigned PrevCodeSize; + std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs; + + explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} + }; + + /// This tracks the codesize of parent blocks. + SmallVector<Block, 8> BlockScope; + + BitstreamBlockInfo *BlockInfo = nullptr; + +public: + static const size_t MaxChunkSize = sizeof(word_t) * 8; + + BitstreamCursor() = default; + explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes) {} + explicit BitstreamCursor(StringRef BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes) {} + explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes) {} + + using SimpleBitstreamCursor::AtEndOfStream; + using SimpleBitstreamCursor::canSkipToPos; + using SimpleBitstreamCursor::fillCurWord; + using SimpleBitstreamCursor::getBitcodeBytes; + using SimpleBitstreamCursor::GetCurrentBitNo; + using SimpleBitstreamCursor::getCurrentByteNo; + using SimpleBitstreamCursor::getPointerToByte; + using SimpleBitstreamCursor::JumpToBit; + using SimpleBitstreamCursor::Read; + using SimpleBitstreamCursor::ReadVBR; + using SimpleBitstreamCursor::ReadVBR64; + using SimpleBitstreamCursor::SizeInBytes; + + /// Return the number of bits used to encode an abbrev #. + unsigned getAbbrevIDWidth() const { return CurCodeSize; } + + /// Flags that modify the behavior of advance(). + enum { + /// If this flag is used, the advance() method does not automatically pop + /// the block scope when the end of a block is reached. + AF_DontPopBlockAtEnd = 1, + + /// If this flag is used, abbrev entries are returned just like normal + /// records. + AF_DontAutoprocessAbbrevs = 2 + }; + + /// Advance the current bitstream, returning the next entry in the stream. + Expected<BitstreamEntry> advance(unsigned Flags = 0) { + while (true) { + if (AtEndOfStream()) + return BitstreamEntry::getError(); + + Expected<unsigned> MaybeCode = ReadCode(); + if (!MaybeCode) + return MaybeCode.takeError(); + unsigned Code = MaybeCode.get(); + + if (Code == bitc::END_BLOCK) { + // Pop the end of the block unless Flags tells us not to. + if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) + return BitstreamEntry::getError(); + return BitstreamEntry::getEndBlock(); + } + + if (Code == bitc::ENTER_SUBBLOCK) { + if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID()) + return BitstreamEntry::getSubBlock(MaybeSubBlock.get()); + else + return MaybeSubBlock.takeError(); + } + + if (Code == bitc::DEFINE_ABBREV && + !(Flags & AF_DontAutoprocessAbbrevs)) { + // We read and accumulate abbrev's, the client can't do anything with + // them anyway. + if (Error Err = ReadAbbrevRecord()) + return std::move(Err); + continue; + } + + return BitstreamEntry::getRecord(Code); + } + } + + /// This is a convenience function for clients that don't expect any + /// subblocks. This just skips over them automatically. + Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) { + while (true) { + // If we found a normal entry, return it. + Expected<BitstreamEntry> MaybeEntry = advance(Flags); + if (!MaybeEntry) + return MaybeEntry; + BitstreamEntry Entry = MaybeEntry.get(); + + if (Entry.Kind != BitstreamEntry::SubBlock) + return Entry; + + // If we found a sub-block, just skip over it and check the next entry. + if (Error Err = SkipBlock()) + return std::move(Err); + } + } + + Expected<unsigned> ReadCode() { return Read(CurCodeSize); } + + // Block header: + // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] + + /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. + Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); } + + /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body + /// of this block. + Error SkipBlock() { + // Read and ignore the codelen value. + if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth)) + ; // Since we are skipping this block, we don't care what code widths are + // used inside of it. + else + return Res.takeError(); + + SkipToFourByteBoundary(); + Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth); + if (!MaybeNum) + return MaybeNum.takeError(); + size_t NumFourBytes = MaybeNum.get(); + + // Check that the block wasn't partially defined, and that the offset isn't + // bogus. + size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8; + if (AtEndOfStream()) + return createStringError(std::errc::illegal_byte_sequence, + "can't skip block: already at end of stream"); + if (!canSkipToPos(SkipTo / 8)) + return createStringError(std::errc::illegal_byte_sequence, + "can't skip to bit %zu from %" PRIu64, SkipTo, + GetCurrentBitNo()); + + if (Error Res = JumpToBit(SkipTo)) + return Res; + + return Error::success(); + } + + /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block. + Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); + + bool ReadBlockEnd() { + if (BlockScope.empty()) return true; + + // Block tail: + // [END_BLOCK, <align4bytes>] + SkipToFourByteBoundary(); + + popBlockScope(); + return false; + } + +private: + void popBlockScope() { + CurCodeSize = BlockScope.back().PrevCodeSize; + + CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); + BlockScope.pop_back(); + } + + //===--------------------------------------------------------------------===// + // Record Processing + //===--------------------------------------------------------------------===// + +public: + /// Return the abbreviation for the specified AbbrevId. + const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { + unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; + if (AbbrevNo >= CurAbbrevs.size()) + report_fatal_error("Invalid abbrev number"); + return CurAbbrevs[AbbrevNo].get(); + } + + /// Read the current record and discard it, returning the code for the record. + Expected<unsigned> skipRecord(unsigned AbbrevID); + + Expected<unsigned> readRecord(unsigned AbbrevID, + SmallVectorImpl<uint64_t> &Vals, + StringRef *Blob = nullptr); + + //===--------------------------------------------------------------------===// + // Abbrev Processing + //===--------------------------------------------------------------------===// + Error ReadAbbrevRecord(); + + /// Read and return a block info block from the bitstream. If an error was + /// encountered, return None. + /// + /// \param ReadBlockInfoNames Whether to read block/record name information in + /// the BlockInfo block. Only llvm-bcanalyzer uses this. + Expected<Optional<BitstreamBlockInfo>> + ReadBlockInfoBlock(bool ReadBlockInfoNames = false); + + /// Set the block info to be used by this BitstreamCursor to interpret + /// abbreviated records. + void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; } +}; + +} // end llvm namespace + +#endif // LLVM_BITSTREAM_BITSTREAMREADER_H diff --git a/include/llvm/Bitstream/BitstreamWriter.h b/include/llvm/Bitstream/BitstreamWriter.h new file mode 100644 index 000000000000..c0ead19dc71d --- /dev/null +++ b/include/llvm/Bitstream/BitstreamWriter.h @@ -0,0 +1,547 @@ +//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the BitstreamWriter class. This class can be used to +// write an arbitrary bitstream, regardless of its contents. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITSTREAM_BITSTREAMWRITER_H +#define LLVM_BITSTREAM_BITSTREAMWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitstream/BitCodes.h" +#include "llvm/Support/Endian.h" +#include <vector> + +namespace llvm { + +class BitstreamWriter { + SmallVectorImpl<char> &Out; + + /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. + unsigned CurBit; + + /// CurValue - The current value. Only bits < CurBit are valid. + uint32_t CurValue; + + /// CurCodeSize - This is the declared size of code values used for the + /// current block, in bits. + unsigned CurCodeSize; + + /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently + /// selected BLOCK ID. + unsigned BlockInfoCurBID; + + /// CurAbbrevs - Abbrevs installed at in this block. + std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs; + + struct Block { + unsigned PrevCodeSize; + size_t StartSizeWord; + std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs; + Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} + }; + + /// BlockScope - This tracks the current blocks that we have entered. + std::vector<Block> BlockScope; + + /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. + /// These describe abbreviations that all blocks of the specified ID inherit. + struct BlockInfo { + unsigned BlockID; + std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs; + }; + std::vector<BlockInfo> BlockInfoRecords; + + void WriteByte(unsigned char Value) { + Out.push_back(Value); + } + + void WriteWord(unsigned Value) { + Value = support::endian::byte_swap<uint32_t, support::little>(Value); + Out.append(reinterpret_cast<const char *>(&Value), + reinterpret_cast<const char *>(&Value + 1)); + } + + size_t GetBufferOffset() const { return Out.size(); } + + size_t GetWordIndex() const { + size_t Offset = GetBufferOffset(); + assert((Offset & 3) == 0 && "Not 32-bit aligned"); + return Offset / 4; + } + +public: + explicit BitstreamWriter(SmallVectorImpl<char> &O) + : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} + + ~BitstreamWriter() { + assert(CurBit == 0 && "Unflushed data remaining"); + assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); + } + + /// Retrieve the current position in the stream, in bits. + uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } + + /// Retrieve the number of bits currently used to encode an abbrev ID. + unsigned GetAbbrevIDWidth() const { return CurCodeSize; } + + //===--------------------------------------------------------------------===// + // Basic Primitives for emitting bits to the stream. + //===--------------------------------------------------------------------===// + + /// Backpatch a 32-bit word in the output at the given bit offset + /// with the specified value. + void BackpatchWord(uint64_t BitNo, unsigned NewWord) { + using namespace llvm::support; + unsigned ByteNo = BitNo / 8; + assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>( + &Out[ByteNo], BitNo & 7)) && + "Expected to be patching over 0-value placeholders"); + endian::writeAtBitAlignment<uint32_t, little, unaligned>( + &Out[ByteNo], NewWord, BitNo & 7); + } + + void BackpatchWord64(uint64_t BitNo, uint64_t Val) { + BackpatchWord(BitNo, (uint32_t)Val); + BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32)); + } + + void Emit(uint32_t Val, unsigned NumBits) { + assert(NumBits && NumBits <= 32 && "Invalid value size!"); + assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); + CurValue |= Val << CurBit; + if (CurBit + NumBits < 32) { + CurBit += NumBits; + return; + } + + // Add the current word. + WriteWord(CurValue); + + if (CurBit) + CurValue = Val >> (32-CurBit); + else + CurValue = 0; + CurBit = (CurBit+NumBits) & 31; + } + + void FlushToWord() { + if (CurBit) { + WriteWord(CurValue); + CurBit = 0; + CurValue = 0; + } + } + + void EmitVBR(uint32_t Val, unsigned NumBits) { + assert(NumBits <= 32 && "Too many bits to emit!"); + uint32_t Threshold = 1U << (NumBits-1); + + // Emit the bits with VBR encoding, NumBits-1 bits at a time. + while (Val >= Threshold) { + Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); + Val >>= NumBits-1; + } + + Emit(Val, NumBits); + } + + void EmitVBR64(uint64_t Val, unsigned NumBits) { + assert(NumBits <= 32 && "Too many bits to emit!"); + if ((uint32_t)Val == Val) + return EmitVBR((uint32_t)Val, NumBits); + + uint32_t Threshold = 1U << (NumBits-1); + + // Emit the bits with VBR encoding, NumBits-1 bits at a time. + while (Val >= Threshold) { + Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) | + (1 << (NumBits-1)), NumBits); + Val >>= NumBits-1; + } + + Emit((uint32_t)Val, NumBits); + } + + /// EmitCode - Emit the specified code. + void EmitCode(unsigned Val) { + Emit(Val, CurCodeSize); + } + + //===--------------------------------------------------------------------===// + // Block Manipulation + //===--------------------------------------------------------------------===// + + /// getBlockInfo - If there is block info for the specified ID, return it, + /// otherwise return null. + BlockInfo *getBlockInfo(unsigned BlockID) { + // Common case, the most recent entry matches BlockID. + if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) + return &BlockInfoRecords.back(); + + for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); + i != e; ++i) + if (BlockInfoRecords[i].BlockID == BlockID) + return &BlockInfoRecords[i]; + return nullptr; + } + + void EnterSubblock(unsigned BlockID, unsigned CodeLen) { + // Block header: + // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] + EmitCode(bitc::ENTER_SUBBLOCK); + EmitVBR(BlockID, bitc::BlockIDWidth); + EmitVBR(CodeLen, bitc::CodeLenWidth); + FlushToWord(); + + size_t BlockSizeWordIndex = GetWordIndex(); + unsigned OldCodeSize = CurCodeSize; + + // Emit a placeholder, which will be replaced when the block is popped. + Emit(0, bitc::BlockSizeWidth); + + CurCodeSize = CodeLen; + + // Push the outer block's abbrev set onto the stack, start out with an + // empty abbrev set. + BlockScope.emplace_back(OldCodeSize, BlockSizeWordIndex); + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + + // If there is a blockinfo for this BlockID, add all the predefined abbrevs + // to the abbrev list. + if (BlockInfo *Info = getBlockInfo(BlockID)) { + CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(), + Info->Abbrevs.end()); + } + } + + void ExitBlock() { + assert(!BlockScope.empty() && "Block scope imbalance!"); + const Block &B = BlockScope.back(); + + // Block tail: + // [END_BLOCK, <align4bytes>] + EmitCode(bitc::END_BLOCK); + FlushToWord(); + + // Compute the size of the block, in words, not counting the size field. + size_t SizeInWords = GetWordIndex() - B.StartSizeWord - 1; + uint64_t BitNo = uint64_t(B.StartSizeWord) * 32; + + // Update the block size field in the header of this sub-block. + BackpatchWord(BitNo, SizeInWords); + + // Restore the inner block's code size and abbrev table. + CurCodeSize = B.PrevCodeSize; + CurAbbrevs = std::move(B.PrevAbbrevs); + BlockScope.pop_back(); + } + + //===--------------------------------------------------------------------===// + // Record Emission + //===--------------------------------------------------------------------===// + +private: + /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev + /// record. This is a no-op, since the abbrev specifies the literal to use. + template<typename uintty> + void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) { + assert(Op.isLiteral() && "Not a literal"); + // If the abbrev specifies the literal value to use, don't emit + // anything. + assert(V == Op.getLiteralValue() && + "Invalid abbrev for record!"); + } + + /// EmitAbbreviatedField - Emit a single scalar field value with the specified + /// encoding. + template<typename uintty> + void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) { + assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!"); + + // Encode the value as we are commanded. + switch (Op.getEncoding()) { + default: llvm_unreachable("Unknown encoding!"); + case BitCodeAbbrevOp::Fixed: + if (Op.getEncodingData()) + Emit((unsigned)V, (unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::VBR: + if (Op.getEncodingData()) + EmitVBR64(V, (unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::Char6: + Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6); + break; + } + } + + /// EmitRecordWithAbbrevImpl - This is the core implementation of the record + /// emission code. If BlobData is non-null, then it specifies an array of + /// data that should be emitted as part of the Blob or Array operand that is + /// known to exist at the end of the record. If Code is specified, then + /// it is the record code to emit before the Vals, which must not contain + /// the code. + template <typename uintty> + void EmitRecordWithAbbrevImpl(unsigned Abbrev, ArrayRef<uintty> Vals, + StringRef Blob, Optional<unsigned> Code) { + const char *BlobData = Blob.data(); + unsigned BlobLen = (unsigned) Blob.size(); + unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; + assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); + const BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo].get(); + + EmitCode(Abbrev); + + unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos()); + if (Code) { + assert(e && "Expected non-empty abbreviation"); + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++); + + if (Op.isLiteral()) + EmitAbbreviatedLiteral(Op, Code.getValue()); + else { + assert(Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::Blob && + "Expected literal or scalar"); + EmitAbbreviatedField(Op, Code.getValue()); + } + } + + unsigned RecordIdx = 0; + for (; i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral()) { + assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); + EmitAbbreviatedLiteral(Op, Vals[RecordIdx]); + ++RecordIdx; + } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { + // Array case. + assert(i + 2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // If this record has blob data, emit it, otherwise we must have record + // entries to encode this way. + if (BlobData) { + assert(RecordIdx == Vals.size() && + "Blob data and record entries specified for array!"); + // Emit a vbr6 to indicate the number of elements present. + EmitVBR(static_cast<uint32_t>(BlobLen), 6); + + // Emit each field. + for (unsigned i = 0; i != BlobLen; ++i) + EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]); + + // Know that blob data is consumed for assertion below. + BlobData = nullptr; + } else { + // Emit a vbr6 to indicate the number of elements present. + EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6); + + // Emit each field. + for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) + EmitAbbreviatedField(EltEnc, Vals[RecordIdx]); + } + } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) { + // If this record has blob data, emit it, otherwise we must have record + // entries to encode this way. + + if (BlobData) { + assert(RecordIdx == Vals.size() && + "Blob data and record entries specified for blob operand!"); + + assert(Blob.data() == BlobData && "BlobData got moved"); + assert(Blob.size() == BlobLen && "BlobLen got changed"); + emitBlob(Blob); + BlobData = nullptr; + } else { + emitBlob(Vals.slice(RecordIdx)); + } + } else { // Single scalar field. + assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); + EmitAbbreviatedField(Op, Vals[RecordIdx]); + ++RecordIdx; + } + } + assert(RecordIdx == Vals.size() && "Not all record operands emitted!"); + assert(BlobData == nullptr && + "Blob data specified for record that doesn't use it!"); + } + +public: + /// Emit a blob, including flushing before and tail-padding. + template <class UIntTy> + void emitBlob(ArrayRef<UIntTy> Bytes, bool ShouldEmitSize = true) { + // Emit a vbr6 to indicate the number of elements present. + if (ShouldEmitSize) + EmitVBR(static_cast<uint32_t>(Bytes.size()), 6); + + // Flush to a 32-bit alignment boundary. + FlushToWord(); + + // Emit literal bytes. + for (const auto &B : Bytes) { + assert(isUInt<8>(B) && "Value too large to emit as byte"); + WriteByte((unsigned char)B); + } + + // Align end to 32-bits. + while (GetBufferOffset() & 3) + WriteByte(0); + } + void emitBlob(StringRef Bytes, bool ShouldEmitSize = true) { + emitBlob(makeArrayRef((const uint8_t *)Bytes.data(), Bytes.size()), + ShouldEmitSize); + } + + /// EmitRecord - Emit the specified record to the stream, using an abbrev if + /// we have one to compress the output. + template <typename Container> + void EmitRecord(unsigned Code, const Container &Vals, unsigned Abbrev = 0) { + if (!Abbrev) { + // If we don't have an abbrev to use, emit this in its fully unabbreviated + // form. + auto Count = static_cast<uint32_t>(makeArrayRef(Vals).size()); + EmitCode(bitc::UNABBREV_RECORD); + EmitVBR(Code, 6); + EmitVBR(Count, 6); + for (unsigned i = 0, e = Count; i != e; ++i) + EmitVBR64(Vals[i], 6); + return; + } + + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), Code); + } + + /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. + /// Unlike EmitRecord, the code for the record should be included in Vals as + /// the first entry. + template <typename Container> + void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) { + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None); + } + + /// EmitRecordWithBlob - Emit the specified record to the stream, using an + /// abbrev that includes a blob at the end. The blob data to emit is + /// specified by the pointer and length specified at the end. In contrast to + /// EmitRecord, this routine expects that the first entry in Vals is the code + /// of the record. + template <typename Container> + void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals, + StringRef Blob) { + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None); + } + template <typename Container> + void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals, + const char *BlobData, unsigned BlobLen) { + return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), + StringRef(BlobData, BlobLen), None); + } + + /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records + /// that end with an array. + template <typename Container> + void EmitRecordWithArray(unsigned Abbrev, const Container &Vals, + StringRef Array) { + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None); + } + template <typename Container> + void EmitRecordWithArray(unsigned Abbrev, const Container &Vals, + const char *ArrayData, unsigned ArrayLen) { + return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), + StringRef(ArrayData, ArrayLen), None); + } + + //===--------------------------------------------------------------------===// + // Abbrev Emission + //===--------------------------------------------------------------------===// + +private: + // Emit the abbreviation as a DEFINE_ABBREV record. + void EncodeAbbrev(const BitCodeAbbrev &Abbv) { + EmitCode(bitc::DEFINE_ABBREV); + EmitVBR(Abbv.getNumOperandInfos(), 5); + for (unsigned i = 0, e = static_cast<unsigned>(Abbv.getNumOperandInfos()); + i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv.getOperandInfo(i); + Emit(Op.isLiteral(), 1); + if (Op.isLiteral()) { + EmitVBR64(Op.getLiteralValue(), 8); + } else { + Emit(Op.getEncoding(), 3); + if (Op.hasEncodingData()) + EmitVBR64(Op.getEncodingData(), 5); + } + } + } +public: + + /// Emits the abbreviation \p Abbv to the stream. + unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) { + EncodeAbbrev(*Abbv); + CurAbbrevs.push_back(std::move(Abbv)); + return static_cast<unsigned>(CurAbbrevs.size())-1 + + bitc::FIRST_APPLICATION_ABBREV; + } + + //===--------------------------------------------------------------------===// + // BlockInfo Block Emission + //===--------------------------------------------------------------------===// + + /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. + void EnterBlockInfoBlock() { + EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2); + BlockInfoCurBID = ~0U; + BlockInfoRecords.clear(); + } +private: + /// SwitchToBlockID - If we aren't already talking about the specified block + /// ID, emit a BLOCKINFO_CODE_SETBID record. + void SwitchToBlockID(unsigned BlockID) { + if (BlockInfoCurBID == BlockID) return; + SmallVector<unsigned, 2> V; + V.push_back(BlockID); + EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V); + BlockInfoCurBID = BlockID; + } + + BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { + if (BlockInfo *BI = getBlockInfo(BlockID)) + return *BI; + + // Otherwise, add a new record. + BlockInfoRecords.emplace_back(); + BlockInfoRecords.back().BlockID = BlockID; + return BlockInfoRecords.back(); + } + +public: + + /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified + /// BlockID. + unsigned EmitBlockInfoAbbrev(unsigned BlockID, std::shared_ptr<BitCodeAbbrev> Abbv) { + SwitchToBlockID(BlockID); + EncodeAbbrev(*Abbv); + + // Add the abbrev to the specified block record. + BlockInfo &Info = getOrCreateBlockInfo(BlockID); + Info.Abbrevs.push_back(std::move(Abbv)); + + return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV; + } +}; + + +} // End llvm namespace + +#endif |