diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:17:04 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:17:04 +0000 |
| commit | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch) | |
| tree | 98b8f811c7aff2547cab8642daf372d6c59502fb /include/llvm/Bitcode | |
| parent | 6421cca32f69ac849537a3cff78c352195e99f1b (diff) | |
Notes
Diffstat (limited to 'include/llvm/Bitcode')
| -rw-r--r-- | include/llvm/Bitcode/BitCodes.h | 8 | ||||
| -rw-r--r-- | include/llvm/Bitcode/BitcodeReader.h (renamed from include/llvm/Bitcode/ReaderWriter.h) | 180 | ||||
| -rw-r--r-- | include/llvm/Bitcode/BitcodeWriter.h | 80 | ||||
| -rw-r--r-- | include/llvm/Bitcode/BitcodeWriterPass.h | 4 | ||||
| -rw-r--r-- | include/llvm/Bitcode/BitstreamReader.h | 241 | ||||
| -rw-r--r-- | include/llvm/Bitcode/BitstreamWriter.h | 19 | ||||
| -rw-r--r-- | include/llvm/Bitcode/LLVMBitCodes.h | 84 |
7 files changed, 310 insertions, 306 deletions
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h index 66400b697c5c..cfc7a1d7d6bd 100644 --- a/include/llvm/Bitcode/BitCodes.h +++ b/include/llvm/Bitcode/BitCodes.h @@ -25,6 +25,14 @@ #include <cassert> namespace llvm { +/// Offsets of the 32-bit fields of bitcode wrapper header. +static const unsigned BWH_MagicField = 0 * 4; +static const unsigned BWH_VersionField = 1 * 4; +static const unsigned BWH_OffsetField = 2 * 4; +static const unsigned BWH_SizeField = 3 * 4; +static const unsigned BWH_CPUTypeField = 4 * 4; +static const unsigned BWH_HeaderSize = 5 * 4; + namespace bitc { enum StandardWidths { BlockIDWidth = 8, // We use VBR-8 for block IDs. diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/BitcodeReader.h index 76a60a0b8d25..9e042b17241f 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/BitcodeReader.h @@ -1,4 +1,4 @@ -//===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===// +//===-- llvm/Bitcode/BitcodeReader.h - Bitcode reader ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,108 +7,135 @@ // //===----------------------------------------------------------------------===// // -// This header defines interfaces to read and write LLVM bitcode files/streams. +// This header defines interfaces to read LLVM bitcode files/streams. // //===----------------------------------------------------------------------===// -#ifndef LLVM_BITCODE_READERWRITER_H -#define LLVM_BITCODE_READERWRITER_H +#ifndef LLVM_BITCODE_BITCODEREADER_H +#define LLVM_BITCODE_BITCODEREADER_H +#include "llvm/Bitcode/BitCodes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include <memory> -#include <string> namespace llvm { - class BitstreamWriter; - class DataStreamer; class LLVMContext; class Module; - class ModulePass; - class raw_ostream; - /// Offsets of the 32-bit fields of bitcode wrapper header. - static const unsigned BWH_MagicField = 0*4; - static const unsigned BWH_VersionField = 1*4; - static const unsigned BWH_OffsetField = 2*4; - static const unsigned BWH_SizeField = 3*4; - static const unsigned BWH_CPUTypeField = 4*4; - static const unsigned BWH_HeaderSize = 5*4; + // These functions are for converting Expected/Error values to + // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: + // Remove these functions once no longer needed by the C and libLTO APIs. + + std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); + + template <typename T> + ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { + if (!Val) + return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); + return std::move(*Val); + } + + /// Represents a module in a bitcode file. + class BitcodeModule { + // This covers the identification (if present) and module blocks. + ArrayRef<uint8_t> Buffer; + StringRef ModuleIdentifier; + + // The bitstream location of the IDENTIFICATION_BLOCK. + uint64_t IdentificationBit; + + // The bitstream location of this module's MODULE_BLOCK. + uint64_t ModuleBit; + + BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, + uint64_t IdentificationBit, uint64_t ModuleBit) + : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), + IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} + + // Calls the ctor. + friend Expected<std::vector<BitcodeModule>> + getBitcodeModuleList(MemoryBufferRef Buffer); + + Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context, + bool MaterializeAll, + bool ShouldLazyLoadMetadata, + bool IsImporting); + + public: + StringRef getBuffer() const { + return StringRef((const char *)Buffer.begin(), Buffer.size()); + } + + StringRef getModuleIdentifier() const { return ModuleIdentifier; } + + /// Read the bitcode module and prepare for lazy deserialization of function + /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. + /// If IsImporting is true, this module is being parsed for ThinLTO + /// importing into another module. + Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, + bool ShouldLazyLoadMetadata, + bool IsImporting); + + /// Read the entire bitcode module and return it. + Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context); + + /// Check if the given bitcode buffer contains a summary block. + Expected<bool> hasSummary(); + + /// Parse the specified bitcode buffer, returning the module summary index. + Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); + }; + + /// Returns a list of modules in the specified bitcode buffer. + Expected<std::vector<BitcodeModule>> + getBitcodeModuleList(MemoryBufferRef Buffer); /// Read the header of the specified bitcode buffer and prepare for lazy /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, - /// lazily load metadata as well. If successful, this moves Buffer. On - /// error, this *does not* move Buffer. - ErrorOr<std::unique_ptr<Module>> - getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer, - LLVMContext &Context, - bool ShouldLazyLoadMetadata = false); - - /// Read the header of the specified stream and prepare for lazy - /// deserialization and streaming of function bodies. - ErrorOr<std::unique_ptr<Module>> - getStreamedBitcodeModule(StringRef Name, - std::unique_ptr<DataStreamer> Streamer, - LLVMContext &Context); + /// lazily load metadata as well. If IsImporting is true, this module is + /// being parsed for ThinLTO importing into another module. + Expected<std::unique_ptr<Module>> + getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, + bool ShouldLazyLoadMetadata = false, + bool IsImporting = false); + + /// Like getLazyBitcodeModule, except that the module takes ownership of + /// the memory buffer if successful. If successful, this moves Buffer. On + /// error, this *does not* move Buffer. If IsImporting is true, this module is + /// being parsed for ThinLTO importing into another module. + Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( + std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, + bool ShouldLazyLoadMetadata = false, bool IsImporting = false); /// Read the header of the specified bitcode buffer and extract just the /// triple information. If successful, this returns a string. On error, this /// returns "". - std::string getBitcodeTargetTriple(MemoryBufferRef Buffer, - LLVMContext &Context); + Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); /// Return true if \p Buffer contains a bitcode file with ObjC code (category /// or class) in it. - bool isBitcodeContainingObjCCategory(MemoryBufferRef Buffer, - LLVMContext &Context); + Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); /// Read the header of the specified bitcode buffer and extract just the /// producer string information. If successful, this returns a string. On /// error, this returns "". - std::string getBitcodeProducerString(MemoryBufferRef Buffer, - LLVMContext &Context); + Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); /// Read the specified bitcode file, returning the module. - ErrorOr<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer, - LLVMContext &Context); + Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer, + LLVMContext &Context); /// Check if the given bitcode buffer contains a summary block. - bool - hasGlobalValueSummary(MemoryBufferRef Buffer, - const DiagnosticHandlerFunction &DiagnosticHandler); + Expected<bool> hasGlobalValueSummary(MemoryBufferRef Buffer); /// Parse the specified bitcode buffer, returning the module summary index. - ErrorOr<std::unique_ptr<ModuleSummaryIndex>> - getModuleSummaryIndex(MemoryBufferRef Buffer, - const DiagnosticHandlerFunction &DiagnosticHandler); - - /// \brief Write the specified module to the specified raw output stream. - /// - /// For streams where it matters, the given stream should be in "binary" - /// mode. - /// - /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a - /// Value in \c M. These will be reconstructed exactly when \a M is - /// deserialized. - /// - /// If \c EmitSummaryIndex, emit the module's summary index (currently - /// for use in ThinLTO optimization). - void WriteBitcodeToFile(const Module *M, raw_ostream &Out, - bool ShouldPreserveUseListOrder = false, - const ModuleSummaryIndex *Index = nullptr, - bool GenerateHash = false); - - /// Write the specified module summary index to the given raw output stream, - /// where it will be written in a new bitcode block. This is used when - /// writing the combined index file for ThinLTO. When writing a subset of the - /// index for a distributed backend, provide the \p ModuleToSummariesForIndex - /// map. - void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, - std::map<std::string, GVSummaryMapTy> - *ModuleToSummariesForIndex = nullptr); + Expected<std::unique_ptr<ModuleSummaryIndex>> + getModuleSummaryIndex(MemoryBufferRef Buffer); /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. @@ -183,26 +210,11 @@ namespace llvm { } const std::error_category &BitcodeErrorCategory(); - enum class BitcodeError { InvalidBitcodeSignature = 1, CorruptedBitcode }; + enum class BitcodeError { CorruptedBitcode = 1 }; inline std::error_code make_error_code(BitcodeError E) { return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); } - class BitcodeDiagnosticInfo : public DiagnosticInfo { - const Twine &Msg; - std::error_code EC; - - public: - BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity, - const Twine &Msg); - void print(DiagnosticPrinter &DP) const override; - std::error_code getError() const { return EC; } - - static bool classof(const DiagnosticInfo *DI) { - return DI->getKind() == DK_Bitcode; - } - }; - } // End llvm namespace namespace std { diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h new file mode 100644 index 000000000000..4f72f98bbf9c --- /dev/null +++ b/include/llvm/Bitcode/BitcodeWriter.h @@ -0,0 +1,80 @@ +//===-- llvm/Bitcode/BitcodeWriter.h - Bitcode writers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines interfaces to write LLVM bitcode files/streams. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_BITCODEWRITER_H +#define LLVM_BITCODE_BITCODEWRITER_H + +#include "llvm/IR/ModuleSummaryIndex.h" +#include <string> + +namespace llvm { + class BitstreamWriter; + class Module; + class raw_ostream; + + class BitcodeWriter { + SmallVectorImpl<char> &Buffer; + std::unique_ptr<BitstreamWriter> Stream; + + public: + /// Create a BitcodeWriter that writes to Buffer. + BitcodeWriter(SmallVectorImpl<char> &Buffer); + + ~BitcodeWriter(); + + /// Write the specified module to the buffer specified at construction time. + /// + /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a + /// Value in \c M. These will be reconstructed exactly when \a M is + /// deserialized. + /// + /// If \c Index is supplied, the bitcode will contain the summary index + /// (currently for use in ThinLTO optimization). + /// + /// \p GenerateHash enables hashing the Module and including the hash in the + /// bitcode (currently for use in ThinLTO incremental build). + void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false, + const ModuleSummaryIndex *Index = nullptr, + bool GenerateHash = false); + }; + + /// \brief Write the specified module to the specified raw output stream. + /// + /// For streams where it matters, the given stream should be in "binary" + /// mode. + /// + /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a + /// Value in \c M. These will be reconstructed exactly when \a M is + /// deserialized. + /// + /// If \c Index is supplied, the bitcode will contain the summary index + /// (currently for use in ThinLTO optimization). + /// + /// \p GenerateHash enables hashing the Module and including the hash in the + /// bitcode (currently for use in ThinLTO incremental build). + void WriteBitcodeToFile(const Module *M, raw_ostream &Out, + bool ShouldPreserveUseListOrder = false, + const ModuleSummaryIndex *Index = nullptr, + bool GenerateHash = false); + + /// Write the specified module summary index to the given raw output stream, + /// where it will be written in a new bitcode block. This is used when + /// writing the combined index file for ThinLTO. When writing a subset of the + /// index for a distributed backend, provide the \p ModuleToSummariesForIndex + /// map. + void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out, + const std::map<std::string, GVSummaryMapTy> + *ModuleToSummariesForIndex = nullptr); +} // End llvm namespace + +#endif diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h index 946255b878a6..9ac6fba16b96 100644 --- a/include/llvm/Bitcode/BitcodeWriterPass.h +++ b/include/llvm/Bitcode/BitcodeWriterPass.h @@ -44,7 +44,7 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str, /// /// Note that this is intended for use with the new pass manager. To construct /// a pass for the legacy pass manager, use the function above. -class BitcodeWriterPass { +class BitcodeWriterPass : public PassInfoMixin<BitcodeWriterPass> { raw_ostream &OS; bool ShouldPreserveUseListOrder; bool EmitSummaryIndex; @@ -68,8 +68,6 @@ public: /// \brief Run the bitcode writer pass, and output the module to the selected /// output stream. PreservedAnalyses run(Module &M, ModuleAnalysisManager &); - - static StringRef name() { return "BitcodeWriterPass"; } }; } diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h index b331ceea051c..4d95a6ce8a16 100644 --- a/include/llvm/Bitcode/BitstreamReader.h +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -15,21 +15,28 @@ #ifndef LLVM_BITCODE_BITSTREAMREADER_H #define LLVM_BITCODE_BITSTREAMREADER_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Bitcode/BitCodes.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/StreamingMemoryObject.h" +#include "llvm/Support/MemoryBuffer.h" +#include <algorithm> +#include <cassert> #include <climits> +#include <cstddef> +#include <cstdint> +#include <memory> #include <string> +#include <utility> #include <vector> namespace llvm { -/// This class is used to read from an LLVM bitcode stream, maintaining -/// information that is global to decoding the entire file. While a file is -/// being read, multiple cursors can be independently advanced or skipped around -/// within the file. These are represented by the BitstreamCursor class. -class BitstreamReader { +/// This class maintains the abbreviations read from a block info block. +class BitstreamBlockInfo { public: /// This contains information emitted to BLOCKINFO_BLOCK blocks. These /// describe abbreviations that all blocks of the specified ID inherit. @@ -37,64 +44,13 @@ public: unsigned BlockID; std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; std::string Name; - std::vector<std::pair<unsigned, std::string> > RecordNames; }; -private: - std::unique_ptr<MemoryObject> BitcodeBytes; +private: std::vector<BlockInfo> BlockInfoRecords; - /// This is set to true if we don't care about the block/record name - /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. - bool IgnoreBlockInfoNames; - - BitstreamReader(const BitstreamReader&) = delete; - void operator=(const BitstreamReader&) = delete; public: - BitstreamReader() : IgnoreBlockInfoNames(true) { - } - - BitstreamReader(const unsigned char *Start, const unsigned char *End) - : IgnoreBlockInfoNames(true) { - init(Start, End); - } - - BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes) - : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} - - BitstreamReader(BitstreamReader &&Other) { - *this = std::move(Other); - } - - BitstreamReader &operator=(BitstreamReader &&Other) { - BitcodeBytes = std::move(Other.BitcodeBytes); - // Explicitly swap block info, so that nothing gets destroyed twice. - std::swap(BlockInfoRecords, Other.BlockInfoRecords); - IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; - return *this; - } - - void init(const unsigned char *Start, const unsigned char *End) { - assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); - BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); - } - - MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } - - /// This is called by clients that want block/record name information. - void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } - bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } - - //===--------------------------------------------------------------------===// - // Block Manipulation - //===--------------------------------------------------------------------===// - - /// Return true if we've already read and processed the block info block for - /// this Bitstream. We only process it for the first cursor that walks over - /// it. - bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } - /// If there is block info for the specified ID, return it, otherwise return /// null. const BlockInfo *getBlockInfo(unsigned BlockID) const { @@ -118,33 +74,21 @@ public: BlockInfoRecords.back().BlockID = BlockID; return BlockInfoRecords.back(); } - - /// Takes block info from the other bitstream reader. - /// - /// This is a "take" operation because BlockInfo records are non-trivial, and - /// indeed rather expensive. - void takeBlockInfo(BitstreamReader &&Other) { - assert(!hasBlockInfoRecords()); - BlockInfoRecords = std::move(Other.BlockInfoRecords); - } }; /// This represents a position within a bitstream. There may be multiple /// independent cursors reading within one bitstream, each maintaining their /// own local state. class SimpleBitstreamCursor { - BitstreamReader *R = nullptr; + ArrayRef<uint8_t> BitcodeBytes; size_t NextChar = 0; - // The size of the bicode. 0 if we don't know it yet. - size_t Size = 0; - +public: /// This is the current data we have pulled from the stream but have not /// returned to the client. This is specifically and intentionally defined to /// follow the word size of the host machine for efficiency. We use word_t in /// places that are aware of this to make it perfectly explicit what is going /// on. -public: typedef size_t word_t; private: @@ -158,23 +102,21 @@ public: static const size_t MaxChunkSize = sizeof(word_t) * 8; SimpleBitstreamCursor() = default; - - explicit SimpleBitstreamCursor(BitstreamReader &R) : R(&R) {} - explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {} + explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) + : BitcodeBytes(BitcodeBytes) {} + explicit SimpleBitstreamCursor(StringRef BitcodeBytes) + : BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()), + BitcodeBytes.size()) {} + explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {} bool canSkipToPos(size_t pos) const { // pos can be skipped to if it is a valid address or one byte past the end. - return pos == 0 || - R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1)); + return pos <= BitcodeBytes.size(); } bool AtEndOfStream() { - if (BitsInCurWord != 0) - return false; - if (Size != 0) - return Size <= NextChar; - fillCurWord(); - return BitsInCurWord == 0; + return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar; } /// Return the bit # of the bit we are reading. @@ -185,8 +127,7 @@ public: // Return the byte # of the current bit. uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } - BitstreamReader *getBitStreamReader() { return R; } - const BitstreamReader *getBitStreamReader() const { return R; } + ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; } /// Reset the stream to the specified bit number. void JumpToBit(uint64_t BitNo) { @@ -203,27 +144,9 @@ public: Read(WordBitNo); } - /// Reset the stream to the bit pointed at by the specified pointer. - /// - /// The pointer must be a dereferenceable pointer into the bytes in the - /// underlying memory object. - void jumpToPointer(const uint8_t *Pointer) { - auto *Pointer0 = getPointerToByte(0, 1); - assert((intptr_t)Pointer0 <= (intptr_t)Pointer && - "Expected pointer into bitstream"); - - JumpToBit(8 * (Pointer - Pointer0)); - assert((intptr_t)getPointerToByte(getCurrentByteNo(), 1) == - (intptr_t)Pointer && - "Expected to reach pointer"); - } - void jumpToPointer(const char *Pointer) { - jumpToPointer((const uint8_t *)Pointer); - } - /// Get a pointer into the bitstream at the specified byte offset. const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { - return R->getBitcodeBytes().getPointer(ByteNo, NumBytes); + return BitcodeBytes.data() + ByteNo; } /// Get a pointer into the bitstream at the specified bit offset. @@ -235,26 +158,24 @@ public: } void fillCurWord() { - if (Size != 0 && NextChar >= Size) + if (NextChar >= BitcodeBytes.size()) report_fatal_error("Unexpected end of file"); // Read the next word from the stream. - uint8_t Array[sizeof(word_t)] = {0}; - - uint64_t BytesRead = - R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); - - // If we run out of data, stop at the end of the stream. - if (BytesRead == 0) { + const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar; + unsigned BytesRead; + if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { + BytesRead = sizeof(word_t); + CurWord = + support::endian::read<word_t, support::little, support::unaligned>( + NextCharPtr); + } else { + // Short read. + BytesRead = BitcodeBytes.size() - NextChar; CurWord = 0; - BitsInCurWord = 0; - Size = NextChar; - return; + for (unsigned B = 0; B != BytesRead; ++B) + CurWord |= uint64_t(NextCharPtr[B]) << (B * 8); } - - CurWord = - support::endian::read<word_t, support::little, support::unaligned>( - Array); NextChar += BytesRead; BitsInCurWord = BytesRead * 8; } @@ -283,9 +204,9 @@ public: fillCurWord(); - // If we run out of data, stop at the end of the stream. + // If we run out of data, abort. if (BitsLeft > BitsInCurWord) - return 0; + report_fatal_error("Unexpected end of file"); word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); @@ -306,7 +227,7 @@ public: uint32_t Result = 0; unsigned NextBit = 0; - while (1) { + while (true) { Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; if ((Piece & (1U << (NumBits-1))) == 0) @@ -326,7 +247,7 @@ public: uint64_t Result = 0; unsigned NextBit = 0; - while (1) { + while (true) { Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; if ((Piece & (1U << (NumBits-1))) == 0) @@ -351,31 +272,7 @@ public: } /// Skip to the end of the file. - void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); } - - /// Prevent the cursor from reading past a byte boundary. - /// - /// Prevent the cursor from requesting byte reads past \c Limit. This is - /// useful when working with a cursor on a StreamingMemoryObject, when it's - /// desirable to avoid invalidating the result of getPointerToByte(). - /// - /// If \c Limit is on a word boundary, AtEndOfStream() will return true if - /// the cursor position reaches or exceeds \c Limit, regardless of the true - /// number of available bytes. Otherwise, AtEndOfStream() returns true when - /// it reaches or exceeds the next word boundary. - void setArtificialByteLimit(uint64_t Limit) { - assert(getCurrentByteNo() < Limit && "Move cursor before lowering limit"); - - // Round to word boundary. - Limit = alignTo(Limit, sizeof(word_t)); - - // Only change size if the new one is lower. - if (!Size || Size > Limit) - Size = Limit; - } - - /// Return the Size, if known. - uint64_t getSizeIfKnown() const { return Size; } + void skipToEnd() { NextChar = BitcodeBytes.size(); } }; /// When advancing through a bitstream cursor, each advance can discover a few @@ -394,12 +291,15 @@ struct BitstreamEntry { static BitstreamEntry getError() { BitstreamEntry E; E.Kind = Error; return E; } + static BitstreamEntry getEndBlock() { BitstreamEntry E; E.Kind = EndBlock; return E; } + static BitstreamEntry getSubBlock(unsigned ID) { BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; } + static BitstreamEntry getRecord(unsigned AbbrevID) { BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; } @@ -421,34 +321,32 @@ class BitstreamCursor : SimpleBitstreamCursor { struct Block { unsigned PrevCodeSize; std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; + explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} }; /// This tracks the codesize of parent blocks. SmallVector<Block, 8> BlockScope; + BitstreamBlockInfo *BlockInfo = nullptr; public: static const size_t MaxChunkSize = sizeof(word_t) * 8; BitstreamCursor() = default; - - explicit BitstreamCursor(BitstreamReader &R) { init(&R); } - - void init(BitstreamReader *R) { - freeState(); - SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R)); - CurCodeSize = 2; - } - - void freeState(); + explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes) {} + explicit BitstreamCursor(StringRef BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes) {} + explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) + : SimpleBitstreamCursor(BitcodeBytes) {} using SimpleBitstreamCursor::canSkipToPos; using SimpleBitstreamCursor::AtEndOfStream; + using SimpleBitstreamCursor::getBitcodeBytes; using SimpleBitstreamCursor::GetCurrentBitNo; using SimpleBitstreamCursor::getCurrentByteNo; using SimpleBitstreamCursor::getPointerToByte; - using SimpleBitstreamCursor::getBitStreamReader; using SimpleBitstreamCursor::JumpToBit; using SimpleBitstreamCursor::fillCurWord; using SimpleBitstreamCursor::Read; @@ -471,7 +369,10 @@ public: /// Advance the current bitstream, returning the next entry in the stream. BitstreamEntry advance(unsigned Flags = 0) { - while (1) { + while (true) { + if (AtEndOfStream()) + return BitstreamEntry::getError(); + unsigned Code = ReadCode(); if (Code == bitc::END_BLOCK) { // Pop the end of the block unless Flags tells us not to. @@ -498,7 +399,7 @@ public: /// This is a convenience function for clients that don't expect any /// subblocks. This just skips over them automatically. BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { - while (1) { + while (true) { // If we found a normal entry, return it. BitstreamEntry Entry = advance(Flags); if (Entry.Kind != BitstreamEntry::SubBlock) @@ -514,7 +415,6 @@ public: return Read(CurCodeSize); } - // Block header: // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] @@ -558,7 +458,6 @@ public: } private: - void popBlockScope() { CurCodeSize = BlockScope.back().PrevCodeSize; @@ -590,9 +489,19 @@ public: //===--------------------------------------------------------------------===// void ReadAbbrevRecord(); - bool ReadBlockInfoBlock(); + /// Read and return a block info block from the bitstream. If an error was + /// encountered, return None. + /// + /// \param ReadBlockInfoNames Whether to read block/record name information in + /// the BlockInfo block. Only llvm-bcanalyzer uses this. + Optional<BitstreamBlockInfo> + ReadBlockInfoBlock(bool ReadBlockInfoNames = false); + + /// Set the block info to be used by this BitstreamCursor to interpret + /// abbreviated records. + void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; } }; -} // End llvm namespace +} // end llvm namespace -#endif +#endif // LLVM_BITCODE_BITSTREAMREADER_H diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index d613f5e18954..8eb6e8aef7a2 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -112,6 +112,11 @@ public: &Out[ByteNo], NewWord, BitNo & 7); } + void BackpatchWord64(uint64_t BitNo, uint64_t Val) { + BackpatchWord(BitNo, (uint32_t)Val); + BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32)); + } + void Emit(uint32_t Val, unsigned NumBits) { assert(NumBits && NumBits <= 32 && "Invalid value size!"); assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); @@ -131,15 +136,6 @@ public: CurBit = (CurBit+NumBits) & 31; } - void Emit64(uint64_t Val, unsigned NumBits) { - if (NumBits <= 32) - Emit((uint32_t)Val, NumBits); - else { - Emit((uint32_t)Val, 32); - Emit((uint32_t)(Val >> 32), NumBits-32); - } - } - void FlushToWord() { if (CurBit) { WriteWord(CurValue); @@ -506,9 +502,10 @@ public: //===--------------------------------------------------------------------===// /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. - void EnterBlockInfoBlock(unsigned CodeWidth) { - EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); + void EnterBlockInfoBlock() { + EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2); BlockInfoCurBID = ~0U; + BlockInfoRecords.clear(); } private: /// SwitchToBlockID - If we aren't already talking about the specified block diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 52d4f01b7985..c996c38261c0 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -120,9 +120,8 @@ enum AttributeCodes { // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0 PARAMATTR_CODE_ENTRY_OLD = 1, // ENTRY: [paramidx0, attr0, // paramidx1, attr1...] - PARAMATTR_CODE_ENTRY = 2, // ENTRY: [paramidx0, attrgrp0, - // paramidx1, attrgrp1, ...] - PARAMATTR_GRP_CODE_ENTRY = 3 // ENTRY: [id, attr0, att1, ...] + PARAMATTR_CODE_ENTRY = 2, // ENTRY: [attrgrp0, attrgrp1, ...] + PARAMATTR_GRP_CODE_ENTRY = 3 // ENTRY: [grpid, idx, attr0, attr1, ...] }; /// TYPE blocks have codes for each type primitive they use. @@ -170,11 +169,6 @@ enum OperandBundleTagCode { OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N] }; -// The type symbol table only has one code (TST_ENTRY_CODE). -enum TypeSymtabCodes { - TST_CODE_ENTRY = 1 // TST_ENTRY: [typeid, namechar x N] -}; - // Value symbol table codes. enum ValueSymtabCodes { VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] @@ -194,20 +188,20 @@ enum ModulePathSymtabCodes { // and combined index cases. enum GlobalValueSummarySymtabCodes { // PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid)] FS_PERMODULE = 1, // PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, hotness)] FS_PERMODULE_PROFILE = 2, // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] FS_PERMODULE_GLOBALVAR_INIT_REFS = 3, // COMBINED: [valueid, modid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid)] FS_COMBINED = 4, // COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, hotness)] FS_COMBINED_PROFILE = 5, // COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] FS_COMBINED_GLOBALVAR_INIT_REFS = 6, @@ -219,45 +213,50 @@ enum GlobalValueSummarySymtabCodes { FS_COMBINED_ORIGINAL_NAME = 9, // VERSION of the summary, bumped when adding flags for instance. FS_VERSION = 10, + // The list of llvm.type.test type identifiers used by the following function. + FS_TYPE_TESTS = 11, }; enum MetadataCodes { - METADATA_STRING_OLD = 1, // MDSTRING: [values] - METADATA_VALUE = 2, // VALUE: [type num, value num] - METADATA_NODE = 3, // NODE: [n x md num] - METADATA_NAME = 4, // STRING: [values] - METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num] - METADATA_KIND = 6, // [n x [id, name]] - METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?] - METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)] - METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)] - METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] - METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]] - METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num] - METADATA_SUBRANGE = 13, // [distinct, count, lo] - METADATA_ENUMERATOR = 14, // [distinct, value, name] - METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc] - METADATA_FILE = 16, // [distinct, filename, directory] - METADATA_DERIVED_TYPE = 17, // [distinct, ...] - METADATA_COMPOSITE_TYPE = 18, // [distinct, ...] - METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc] - METADATA_COMPILE_UNIT = 20, // [distinct, ...] - METADATA_SUBPROGRAM = 21, // [distinct, ...] - METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] + METADATA_STRING_OLD = 1, // MDSTRING: [values] + METADATA_VALUE = 2, // VALUE: [type num, value num] + METADATA_NODE = 3, // NODE: [n x md num] + METADATA_NAME = 4, // STRING: [values] + METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num] + METADATA_KIND = 6, // [n x [id, name]] + METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?] + METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)] + METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)] + METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] + METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]] + METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num] + METADATA_SUBRANGE = 13, // [distinct, count, lo] + METADATA_ENUMERATOR = 14, // [distinct, value, name] + METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc] + METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum] + METADATA_DERIVED_TYPE = 17, // [distinct, ...] + METADATA_COMPOSITE_TYPE = 18, // [distinct, ...] + METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc] + METADATA_COMPILE_UNIT = 20, // [distinct, ...] + METADATA_SUBPROGRAM = 21, // [distinct, ...] + METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator] - METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line] - METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...] - METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...] - METADATA_GLOBAL_VAR = 27, // [distinct, ...] - METADATA_LOCAL_VAR = 28, // [distinct, ...] - METADATA_EXPRESSION = 29, // [distinct, n x element] - METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...] + METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols] + METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...] + METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...] + METADATA_GLOBAL_VAR = 27, // [distinct, ...] + METADATA_LOCAL_VAR = 28, // [distinct, ...] + METADATA_EXPRESSION = 29, // [distinct, n x element] + METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...] METADATA_IMPORTED_ENTITY = 31, // [distinct, tag, scope, entity, line, name] METADATA_MODULE = 32, // [distinct, scope, name, ...] METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]] + METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr] + METADATA_INDEX_OFFSET = 38, // [offset] + METADATA_INDEX = 39, // [bitpos] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each @@ -286,8 +285,9 @@ enum ConstantsCodes { CST_CODE_CE_INBOUNDS_GEP = 20, // INBOUNDS_GEP: [n x operands] CST_CODE_BLOCKADDRESS = 21, // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] CST_CODE_DATA = 22, // DATA: [n x elements] - CST_CODE_INLINEASM = 23 // INLINEASM: [sideeffect|alignstack| + CST_CODE_INLINEASM = 23, // INLINEASM: [sideeffect|alignstack| // asmdialect,asmstr,conststr] + CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, // [opty, flags, n x operands] }; /// CastOpcodes - These are values used in the bitcode files to encode which |
