diff options
Diffstat (limited to 'include/llvm/CodeGen')
108 files changed, 5185 insertions, 3539 deletions
diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h new file mode 100644 index 000000000000..13928582f2dd --- /dev/null +++ b/include/llvm/CodeGen/AccelTable.h @@ -0,0 +1,434 @@ +//==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_DWARFACCELTABLE_H +#define LLVM_CODEGEN_DWARFACCELTABLE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/DwarfStringPoolEntry.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/DJB.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <cstddef> +#include <cstdint> +#include <vector> + +/// The DWARF and Apple accelerator tables are an indirect hash table optimized +/// for null lookup rather than access to known data. The Apple accelerator +/// tables are a precursor of the newer DWARF v5 accelerator tables. Both +/// formats share common design ideas. +/// +/// The Apple accelerator table are output into an on-disk format that looks +/// like this: +/// +/// .------------------. +/// | HEADER | +/// |------------------| +/// | BUCKETS | +/// |------------------| +/// | HASHES | +/// |------------------| +/// | OFFSETS | +/// |------------------| +/// | DATA | +/// `------------------' +/// +/// The header contains a magic number, version, type of hash function, +/// the number of buckets, total number of hashes, and room for a special struct +/// of data and the length of that struct. +/// +/// The buckets contain an index (e.g. 6) into the hashes array. The hashes +/// section contains all of the 32-bit hash values in contiguous memory, and the +/// offsets contain the offset into the data area for the particular hash. +/// +/// For a lookup example, we could hash a function name and take it modulo the +/// number of buckets giving us our bucket. From there we take the bucket value +/// as an index into the hashes table and look at each successive hash as long +/// as the hash value is still the same modulo result (bucket value) as earlier. +/// If we have a match we look at that same entry in the offsets table and grab +/// the offset in the data for our final match. +/// +/// The DWARF v5 accelerator table consists of zero or more name indices that +/// are output into an on-disk format that looks like this: +/// +/// .------------------. +/// | HEADER | +/// |------------------| +/// | CU LIST | +/// |------------------| +/// | LOCAL TU LIST | +/// |------------------| +/// | FOREIGN TU LIST | +/// |------------------| +/// | HASH TABLE | +/// |------------------| +/// | NAME TABLE | +/// |------------------| +/// | ABBREV TABLE | +/// |------------------| +/// | ENTRY POOL | +/// `------------------' +/// +/// For the full documentation please refer to the DWARF 5 standard. +/// +/// +/// This file defines the class template AccelTable, which is represents an +/// abstract view of an Accelerator table, without any notion of an on-disk +/// layout. This class is parameterized by an entry type, which should derive +/// from AccelTableData. This is the type of individual entries in the table, +/// and it should store the data necessary to emit them. AppleAccelTableData is +/// the base class for Apple Accelerator Table entries, which have a uniform +/// structure based on a sequence of Atoms. There are different sub-classes +/// derived from AppleAccelTable, which differ in the set of Atoms and how they +/// obtain their values. +/// +/// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable +/// function. +/// +/// TODO: Add DWARF v5 emission code. + +namespace llvm { + +class AsmPrinter; +class DwarfCompileUnit; +class DwarfDebug; + +/// Interface which the different types of accelerator table data have to +/// conform. It serves as a base class for different values of the template +/// argument of the AccelTable class template. +class AccelTableData { +public: + virtual ~AccelTableData() = default; + + bool operator<(const AccelTableData &Other) const { + return order() < Other.order(); + } + + // Subclasses should implement: + // static uint32_t hash(StringRef Name); + +#ifndef NDEBUG + virtual void print(raw_ostream &OS) const = 0; +#endif +protected: + virtual uint64_t order() const = 0; +}; + +/// A base class holding non-template-dependant functionality of the AccelTable +/// class. Clients should not use this class directly but rather instantiate +/// AccelTable with a type derived from AccelTableData. +class AccelTableBase { +public: + using HashFn = uint32_t(StringRef); + + /// Represents a group of entries with identical name (and hence, hash value). + struct HashData { + DwarfStringPoolEntryRef Name; + uint32_t HashValue; + std::vector<AccelTableData *> Values; + MCSymbol *Sym; + + HashData(DwarfStringPoolEntryRef Name, HashFn *Hash) + : Name(Name), HashValue(Hash(Name.getString())) {} + +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + }; + using HashList = std::vector<HashData *>; + using BucketList = std::vector<HashList>; + +protected: + /// Allocator for HashData and Values. + BumpPtrAllocator Allocator; + + using StringEntries = StringMap<HashData, BumpPtrAllocator &>; + StringEntries Entries; + + HashFn *Hash; + uint32_t BucketCount; + uint32_t UniqueHashCount; + + HashList Hashes; + BucketList Buckets; + + void computeBucketCount(); + + AccelTableBase(HashFn *Hash) : Entries(Allocator), Hash(Hash) {} + +public: + void finalize(AsmPrinter *Asm, StringRef Prefix); + ArrayRef<HashList> getBuckets() const { return Buckets; } + uint32_t getBucketCount() const { return BucketCount; } + uint32_t getUniqueHashCount() const { return UniqueHashCount; } + uint32_t getUniqueNameCount() const { return Entries.size(); } + +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + + AccelTableBase(const AccelTableBase &) = delete; + void operator=(const AccelTableBase &) = delete; +}; + +/// This class holds an abstract representation of an Accelerator Table, +/// consisting of a sequence of buckets, each bucket containint a sequence of +/// HashData entries. The class is parameterized by the type of entries it +/// holds. The type template parameter also defines the hash function to use for +/// hashing names. +template <typename DataT> class AccelTable : public AccelTableBase { +public: + AccelTable() : AccelTableBase(DataT::hash) {} + + template <typename... Types> + void addName(DwarfStringPoolEntryRef Name, Types &&... Args); +}; + +template <typename AccelTableDataT> +template <typename... Types> +void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name, + Types &&... Args) { + assert(Buckets.empty() && "Already finalized!"); + // If the string is in the list already then add this die to the list + // otherwise add a new one. + auto Iter = Entries.try_emplace(Name.getString(), Name, Hash).first; + assert(Iter->second.Name == Name); + Iter->second.Values.push_back( + new (Allocator) AccelTableDataT(std::forward<Types>(Args)...)); +} + +/// A base class for different implementations of Data classes for Apple +/// Accelerator Tables. The columns in the table are defined by the static Atoms +/// variable defined on the subclasses. +class AppleAccelTableData : public AccelTableData { +public: + /// An Atom defines the form of the data in an Apple accelerator table. + /// Conceptually it is a column in the accelerator consisting of a type and a + /// specification of the form of its data. + struct Atom { + /// Atom Type. + const uint16_t Type; + /// DWARF Form. + const uint16_t Form; + + constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {} + +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + }; + // Subclasses should define: + // static constexpr Atom Atoms[]; + + virtual void emit(AsmPrinter *Asm) const = 0; + + static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); } +}; + +/// The Data class implementation for DWARF v5 accelerator table. Unlike the +/// Apple Data classes, this class is just a DIE wrapper, and does not know to +/// serialize itself. The complete serialization logic is in the +/// emitDWARF5AccelTable function. +class DWARF5AccelTableData : public AccelTableData { +public: + static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); } + + DWARF5AccelTableData(const DIE &Die) : Die(Die) {} + +#ifndef NDEBUG + void print(raw_ostream &OS) const override; +#endif + + const DIE &getDie() const { return Die; } + uint64_t getDieOffset() const { return Die.getOffset(); } + unsigned getDieTag() const { return Die.getTag(); } + +protected: + const DIE &Die; + + uint64_t order() const override { return Die.getOffset(); } +}; + +class DWARF5AccelTableStaticData : public AccelTableData { +public: + static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); } + + DWARF5AccelTableStaticData(uint64_t DieOffset, unsigned DieTag, + unsigned CUIndex) + : DieOffset(DieOffset), DieTag(DieTag), CUIndex(CUIndex) {} + +#ifndef NDEBUG + void print(raw_ostream &OS) const override; +#endif + + uint64_t getDieOffset() const { return DieOffset; } + unsigned getDieTag() const { return DieTag; } + unsigned getCUIndex() const { return CUIndex; } + +protected: + uint64_t DieOffset; + unsigned DieTag; + unsigned CUIndex; + + uint64_t order() const override { return DieOffset; } +}; + +void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, + StringRef Prefix, const MCSymbol *SecBegin, + ArrayRef<AppleAccelTableData::Atom> Atoms); + +/// Emit an Apple Accelerator Table consisting of entries in the specified +/// AccelTable. The DataT template parameter should be derived from +/// AppleAccelTableData. +template <typename DataT> +void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents, + StringRef Prefix, const MCSymbol *SecBegin) { + static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value, ""); + emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms); +} + +void emitDWARF5AccelTable(AsmPrinter *Asm, + AccelTable<DWARF5AccelTableData> &Contents, + const DwarfDebug &DD, + ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs); + +void emitDWARF5AccelTable( + AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents, + ArrayRef<MCSymbol *> CUs, + llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)> + getCUIndexForEntry); + +/// Accelerator table data implementation for simple Apple accelerator tables +/// with just a DIE reference. +class AppleAccelTableOffsetData : public AppleAccelTableData { +public: + AppleAccelTableOffsetData(const DIE &D) : Die(D) {} + + void emit(AsmPrinter *Asm) const override; + +#ifndef _MSC_VER + // The line below is rejected by older versions (TBD) of MSVC. + static constexpr Atom Atoms[] = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; +#else + // FIXME: Erase this path once the minimum MSCV version has been bumped. + static const SmallVector<Atom, 4> Atoms; +#endif + +#ifndef NDEBUG + void print(raw_ostream &OS) const override; +#endif +protected: + uint64_t order() const override { return Die.getOffset(); } + + const DIE &Die; +}; + +/// Accelerator table data implementation for Apple type accelerator tables. +class AppleAccelTableTypeData : public AppleAccelTableOffsetData { +public: + AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {} + + void emit(AsmPrinter *Asm) const override; + +#ifndef _MSC_VER + // The line below is rejected by older versions (TBD) of MSVC. + static constexpr Atom Atoms[] = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; +#else + // FIXME: Erase this path once the minimum MSCV version has been bumped. + static const SmallVector<Atom, 4> Atoms; +#endif + +#ifndef NDEBUG + void print(raw_ostream &OS) const override; +#endif +}; + +/// Accelerator table data implementation for simple Apple accelerator tables +/// with a DIE offset but no actual DIE pointer. +class AppleAccelTableStaticOffsetData : public AppleAccelTableData { +public: + AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {} + + void emit(AsmPrinter *Asm) const override; + +#ifndef _MSC_VER + // The line below is rejected by older versions (TBD) of MSVC. + static constexpr Atom Atoms[] = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; +#else + // FIXME: Erase this path once the minimum MSCV version has been bumped. + static const SmallVector<Atom, 4> Atoms; +#endif + +#ifndef NDEBUG + void print(raw_ostream &OS) const override; +#endif +protected: + uint64_t order() const override { return Offset; } + + uint32_t Offset; +}; + +/// Accelerator table data implementation for type accelerator tables with +/// a DIE offset but no actual DIE pointer. +class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData { +public: + AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag, + bool ObjCClassIsImplementation, + uint32_t QualifiedNameHash) + : AppleAccelTableStaticOffsetData(Offset), + QualifiedNameHash(QualifiedNameHash), Tag(Tag), + ObjCClassIsImplementation(ObjCClassIsImplementation) {} + + void emit(AsmPrinter *Asm) const override; + +#ifndef _MSC_VER + // The line below is rejected by older versions (TBD) of MSVC. + static constexpr Atom Atoms[] = { + Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)}; +#else + // FIXME: Erase this path once the minimum MSCV version has been bumped. + static const SmallVector<Atom, 4> Atoms; +#endif + +#ifndef NDEBUG + void print(raw_ostream &OS) const override; +#endif +protected: + uint64_t order() const override { return Offset; } + + uint32_t QualifiedNameHash; + uint16_t Tag; + bool ObjCClassIsImplementation; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_DWARFACCELTABLE_H diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h index ba88f1f78fb8..d77aee66ed76 100644 --- a/include/llvm/CodeGen/Analysis.h +++ b/include/llvm/CodeGen/Analysis.h @@ -36,7 +36,7 @@ class SDValue; class SelectionDAG; struct EVT; -/// \brief Compute the linearized index of a member in a nested +/// Compute the linearized index of a member in a nested /// aggregate/struct/array. /// /// Given an LLVM IR aggregate type and a sequence of insertvalue or @@ -124,7 +124,7 @@ bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I, const TargetLoweringBase &TLI); DenseMap<const MachineBasicBlock *, int> -getFuncletMembership(const MachineFunction &MF); +getEHScopeMembership(const MachineFunction &MF); } // End llvm namespace diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index b8944a668000..b6056380916c 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -50,6 +50,7 @@ class GlobalValue; class GlobalVariable; class MachineBasicBlock; class MachineConstantPoolValue; +class MachineDominatorTree; class MachineFunction; class MachineInstr; class MachineJumpTableInfo; @@ -92,11 +93,17 @@ public: std::unique_ptr<MCStreamer> OutStreamer; /// The current machine function. - const MachineFunction *MF = nullptr; + MachineFunction *MF = nullptr; /// This is a pointer to the current MachineModuleInfo. MachineModuleInfo *MMI = nullptr; + /// This is a pointer to the current MachineLoopInfo. + MachineDominatorTree *MDT = nullptr; + + /// This is a pointer to the current MachineLoopInfo. + MachineLoopInfo *MLI = nullptr; + /// Optimization remark emitter. MachineOptimizationRemarkEmitter *ORE; @@ -130,9 +137,6 @@ private: static char ID; - /// If VerboseAsm is set, a pointer to the loop info for this function. - MachineLoopInfo *LI = nullptr; - struct HandlerInfo { AsmPrinterHandler *Handler; const char *TimerName; @@ -161,6 +165,12 @@ public: }; private: + /// If generated on the fly this own the instance. + std::unique_ptr<MachineDominatorTree> OwnedMDT; + + /// If generated on the fly this own the instance. + std::unique_ptr<MachineLoopInfo> OwnedMLI; + /// Structure for generating diagnostics for inline assembly. Only initialised /// when necessary. mutable std::unique_ptr<SrcMgrDiagInfo> DiagInfo; @@ -191,6 +201,10 @@ public: /// Return a unique ID for the current function. unsigned getFunctionNumber() const; + /// Return symbol for the function pseudo stack if the stack frame is not a + /// register based. + virtual const MCSymbol *getFunctionFrameSymbol() const { return nullptr; } + MCSymbol *getFunctionBegin() const { return CurrentFnBegin; } MCSymbol *getFunctionEnd() const { return CurrentFnEnd; } MCSymbol *getCurExceptionSym(); @@ -228,6 +242,7 @@ public: TAIL_CALL = 2, LOG_ARGS_ENTER = 3, CUSTOM_EVENT = 4, + TYPED_EVENT = 5, }; // The table will contain these structs that point to the sled, the function @@ -327,15 +342,15 @@ public: /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for /// correctness. - void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const; + void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. virtual const MCExpr *lowerConstant(const Constant *CV); - /// \brief Print a general LLVM constant to the .s file. + /// Print a general LLVM constant to the .s file. void EmitGlobalConstant(const DataLayout &DL, const Constant *CV); - /// \brief Unnamed constant global variables solely contaning a pointer to + /// Unnamed constant global variables solely contaning a pointer to /// another globals variable act like a global variable "proxy", or GOT /// equivalents, i.e., it's only used to hold the address of the latter. One /// optimization is to replace accesses to these proxies by using the GOT @@ -345,7 +360,7 @@ public: /// accesses to GOT entries. void computeGlobalGOTEquivs(Module &M); - /// \brief Constant expressions using GOT equivalent globals may not be + /// Constant expressions using GOT equivalent globals may not be /// eligible for PC relative GOT entry conversion, in such cases we need to /// emit the proxies we previously omitted in EmitGlobalVariable. void emitGlobalGOTEquivs(); @@ -444,13 +459,16 @@ public: void printOffset(int64_t Offset, raw_ostream &OS) const; /// Emit a byte directive and value. - void EmitInt8(int Value) const; + void emitInt8(int Value) const; /// Emit a short directive and value. - void EmitInt16(int Value) const; + void emitInt16(int Value) const; /// Emit a long directive and value. - void EmitInt32(int Value) const; + void emitInt32(int Value) const; + + /// Emit a long long directive and value. + void emitInt64(uint64_t Value) const; /// Emit something like ".long Hi-Lo" where the size in bytes of the directive /// is specified by Size and Hi/Lo specify the labels. This implicitly uses @@ -458,6 +476,10 @@ public: void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const; + /// Emit something like ".uleb128 Hi-Lo". + void EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) const; + /// Emit something like ".long Label+Offset" where the size in bytes of the /// directive is specified by Size and Label specifies the label. This /// implicitly uses .set if it is available. @@ -471,6 +493,9 @@ public: EmitLabelPlusOffset(Label, 0, Size, IsSectionRelative); } + /// Emit something like ".long Label + Offset". + void EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const; + //===------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===------------------------------------------------------------------===// @@ -481,11 +506,6 @@ public: /// Emit the specified unsigned leb128 value. void EmitULEB128(uint64_t Value, const char *Desc = nullptr) const; - /// Emit the specified unsigned leb128 value padded to a specific number - /// bytes - void EmitPaddedULEB128(uint64_t Value, unsigned PadTo, - const char *Desc = nullptr) const; - /// Emit a .byte 42 directive that corresponds to an encoding. If verbose /// assembly output is enabled, we output comments describing the encoding. /// Desc is a string saying what the encoding is specifying (e.g. "LSDA"). @@ -508,7 +528,12 @@ public: /// When possible, emit a DwarfStringPool section offset without any /// relocations, and without using the symbol. Otherwise, defers to \a /// emitDwarfSymbolReference(). - void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const; + void emitDwarfStringOffset(DwarfStringPoolEntry S) const; + + /// Emit the 4-byte offset of a string from the start of its section. + void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { + emitDwarfStringOffset(S.getEntry()); + } /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified. virtual unsigned getISAEncoding() { return 0; } @@ -523,10 +548,10 @@ public: // Dwarf Lowering Routines //===------------------------------------------------------------------===// - /// \brief Emit frame instruction to describe the layout of the frame. + /// Emit frame instruction to describe the layout of the frame. void emitCFIInstruction(const MCCFIInstruction &Inst) const; - /// \brief Emit Dwarf abbreviation table. + /// Emit Dwarf abbreviation table. template <typename T> void emitDwarfAbbrevs(const T &Abbrevs) const { // For each abbreviation. for (const auto &Abbrev : Abbrevs) @@ -538,7 +563,7 @@ public: void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const; - /// \brief Recursively emit Dwarf DIE tree. + /// Recursively emit Dwarf DIE tree. void emitDwarfDIE(const DIE &Die) const; //===------------------------------------------------------------------===// @@ -625,10 +650,9 @@ private: void EmitXXStructorList(const DataLayout &DL, const Constant *List, bool isCtor); - GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C); + GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S); /// Emit GlobalAlias or GlobalIFunc. - void emitGlobalIndirectSymbol(Module &M, - const GlobalIndirectSymbol& GIS); + void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS); void setupCodePaddingContext(const MachineBasicBlock &MBB, MCCodePaddingContext &Context) const; }; diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h index 1f9c96b18e1b..b1adf66e7ff4 100644 --- a/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/include/llvm/CodeGen/AtomicExpandUtils.h @@ -26,7 +26,7 @@ using CreateCmpXchgInstFun = function_ref<void(IRBuilder<> &, Value *, Value *, Value *, AtomicOrdering, Value *&, Value *&)>; -/// \brief Expand an atomic RMW instruction into a loop utilizing +/// Expand an atomic RMW instruction into a loop utilizing /// cmpxchg. You'll want to make sure your target machine likes cmpxchg /// instructions in the first place and that there isn't another, better, /// transformation available (for example AArch32/AArch64 have linked loads). @@ -58,7 +58,7 @@ using CreateCmpXchgInstFun = /// [...] /// /// Returns true if the containing function was modified. -bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); } // end namespace llvm diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 526ddb1b9706..f76a2426377a 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -26,7 +26,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -47,6 +46,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <cassert> @@ -65,7 +65,7 @@ class TargetMachine; extern cl::opt<unsigned> PartialUnrollingThreshold; -/// \brief Base class which can be used to help build a TTI implementation. +/// Base class which can be used to help build a TTI implementation. /// /// This class provides as much implementation of the TTI interface as is /// possible using the target independent parts of the code generator. @@ -101,16 +101,32 @@ private: return Cost; } - /// \brief Local query method delegates up to T which *must* implement this! + /// Local query method delegates up to T which *must* implement this! const TargetSubtargetInfo *getST() const { return static_cast<const T *>(this)->getST(); } - /// \brief Local query method delegates up to T which *must* implement this! + /// Local query method delegates up to T which *must* implement this! const TargetLoweringBase *getTLI() const { return static_cast<const T *>(this)->getTLI(); } + static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { + switch (M) { + case TTI::MIM_Unindexed: + return ISD::UNINDEXED; + case TTI::MIM_PreInc: + return ISD::PRE_INC; + case TTI::MIM_PreDec: + return ISD::PRE_DEC; + case TTI::MIM_PostInc: + return ISD::POST_INC; + case TTI::MIM_PostDec: + return ISD::POST_DEC; + } + llvm_unreachable("Unexpected MemIndexedMode"); + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -157,6 +173,18 @@ public: return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } + bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, + const DataLayout &DL) const { + EVT VT = getTLI()->getValueType(DL, Ty); + return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); + } + + bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, + const DataLayout &DL) const { + EVT VT = getTLI()->getValueType(DL, Ty); + return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); + } + bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } @@ -179,6 +207,8 @@ public: return getTLI()->isProfitableToHoist(I); } + bool useAA() const { return getST()->useAA(); } + bool isTypeLegal(Type *Ty) { EVT VT = getTLI()->getValueType(DL, Ty); return getTLI()->isTypeLegal(VT); @@ -240,7 +270,7 @@ public: bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); // Early exit if both a jump table and bit test are not allowed. - if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N)) + if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) return N; APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); @@ -254,7 +284,7 @@ public: } // Check if suitable for a bit test - if (N <= DL.getPointerSizeInBits()) { + if (N <= DL.getIndexSizeInBits(0u)) { SmallPtrSet<const BasicBlock *, 4> Dests; for (auto I : SI.cases()) Dests.insert(I.getCaseSuccessor()); @@ -523,11 +553,15 @@ public: unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc || - Kind == TTI::SK_PermuteSingleSrc) { + switch (Kind) { + case TTI::SK_Select: + case TTI::SK_Transpose: + case TTI::SK_PermuteSingleSrc: + case TTI::SK_PermuteTwoSrc: return getPermuteShuffleOverhead(Tp); + default: + return 1; } - return 1; } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -614,7 +648,7 @@ public: } // If we are legalizing by splitting, query the concrete TTI for the cost - // of casting the original vector twice. We also need to factor int the + // of casting the original vector twice. We also need to factor in the // cost of the split itself. Count that as 1, to be consistent with // TLI->getTypeLegalizationCost(). if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == @@ -916,6 +950,20 @@ public: RetTy, Args[0], VarMask, Alignment); } + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); } } @@ -1039,6 +1087,39 @@ public: case Intrinsic::masked_load: return static_cast<T *>(this) ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); + case Intrinsic::experimental_vector_reduce_add: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::Add, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_mul: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_and: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::And, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_or: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::Or, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_xor: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_fadd: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_fmul: + return static_cast<T *>(this)->getArithmeticReductionCost( + Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false); + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + return static_cast<T *>(this)->getMinMaxReductionCost( + Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, + /*IsSigned=*/true); + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + return static_cast<T *>(this)->getMinMaxReductionCost( + Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, + /*IsSigned=*/false); case Intrinsic::ctpop: ISDs.push_back(ISD::CTPOP); // In case of legalization use TCC_Expensive. This is cheaper than a @@ -1123,7 +1204,7 @@ public: return SingleCallCost; } - /// \brief Compute a cost of the given call instruction. + /// Compute a cost of the given call instruction. /// /// Compute the cost of calling function F with return type RetTy and /// argument types Tys. F might be nullptr, in this case the cost of an @@ -1284,7 +1365,7 @@ public: /// @} }; -/// \brief Concrete BasicTTIImpl that can be used if no further customization +/// Concrete BasicTTIImpl that can be used if no further customization /// is needed. class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { using BaseT = BasicTTIImplBase<BasicTTIImpl>; @@ -1298,7 +1379,7 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { const TargetLoweringBase *getTLI() const { return TLI; } public: - explicit BasicTTIImpl(const TargetMachine *ST, const Function &F); + explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); }; } // end namespace llvm diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h index d9e8206408a7..f85767f1fc11 100644 --- a/include/llvm/CodeGen/CalcSpillWeights.h +++ b/include/llvm/CodeGen/CalcSpillWeights.h @@ -22,7 +22,7 @@ class MachineFunction; class MachineLoopInfo; class VirtRegMap; - /// \brief Normalize the spill weight of a live interval + /// Normalize the spill weight of a live interval /// /// The spill weight of a live interval is computed as: /// @@ -42,7 +42,7 @@ class VirtRegMap; return UseDefFreq / (Size + 25*SlotIndex::InstrDist); } - /// \brief Calculate auxiliary information for a virtual register such as its + /// Calculate auxiliary information for a virtual register such as its /// spill weight and allocation hint. class VirtRegAuxInfo { public: @@ -64,10 +64,10 @@ class VirtRegMap; NormalizingFn norm = normalizeSpillWeight) : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {} - /// \brief (re)compute li's spill weight and allocation hint. + /// (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &li); - /// \brief Compute future expected spill weight of a split artifact of li + /// Compute future expected spill weight of a split artifact of li /// that will span between start and end slot indexes. /// \param li The live interval to be split. /// \param start The expected begining of the split artifact. Instructions @@ -78,7 +78,7 @@ class VirtRegMap; /// negative weight for unspillable li. float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end); - /// \brief Helper function for weight calculations. + /// Helper function for weight calculations. /// (Re)compute li's spill weight and allocation hint, or, for non null /// start and end - compute future expected spill weight of a split /// artifact of li that will span between start and end slot indexes. @@ -94,7 +94,7 @@ class VirtRegMap; SlotIndex *end = nullptr); }; - /// \brief Compute spill weights and allocation hints for all virtual register + /// Compute spill weights and allocation hints for all virtual register /// live intervals. void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, VirtRegMap *VRM, diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index d30a27328c01..efcf80ba0b4e 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -304,7 +304,7 @@ public: /// CheckReturn - Analyze the return values of a function, returning /// true if the return can be performed without sret-demotion, and /// false otherwise. - bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags, + bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn); /// AnalyzeCallOperands - Analyze the outgoing arguments to a call, diff --git a/include/llvm/CodeGen/CommandFlags.def b/include/llvm/CodeGen/CommandFlags.inc index fe96033a9c61..7d2d167289e0 100644 --- a/include/llvm/CodeGen/CommandFlags.def +++ b/include/llvm/CodeGen/CommandFlags.inc @@ -17,7 +17,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCTargetOptionsCommandFlags.def" +#include "llvm/MC/MCTargetOptionsCommandFlags.inc" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -98,7 +98,9 @@ static cl::opt<llvm::ExceptionHandling> ExceptionModel( clEnumValN(ExceptionHandling::SjLj, "sjlj", "SjLj exception handling"), clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"), clEnumValN(ExceptionHandling::WinEH, "wineh", - "Windows exception model"))); + "Windows exception model"), + clEnumValN(ExceptionHandling::Wasm, "wasm", + "WebAssembly exception handling"))); static cl::opt<TargetMachine::CodeGenFileType> FileType( "filetype", cl::init(TargetMachine::CGFT_AssemblyFile), @@ -259,6 +261,10 @@ static cl::opt<bool> EnableStackSizeSection( "stack-size-section", cl::desc("Emit a section containing stack size metadata"), cl::init(false)); +static cl::opt<bool> + EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"), + cl::init(false)); + // Common utility function tightly tied to the options listed here. Initializes // a TargetOptions object with CodeGen flags and returns it. static TargetOptions InitTargetOptionsFromCodeGenFlags() { @@ -284,8 +290,10 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.FunctionSections = FunctionSections; Options.UniqueSectionNames = UniqueSectionNames; Options.EmulatedTLS = EmulatedTLS; + Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0; Options.ExceptionModel = ExceptionModel; Options.EmitStackSizeSection = EnableStackSizeSection; + Options.EmitAddrsig = EnableAddrsig; Options.MCOptions = InitMCTargetOptionsFromFlags(); @@ -326,7 +334,27 @@ LLVM_ATTRIBUTE_UNUSED static std::string getFeaturesStr() { return Features.getString(); } -/// \brief Set function attributes of functions in Module M based on CPU, +LLVM_ATTRIBUTE_UNUSED static std::vector<std::string> getFeatureList() { + SubtargetFeatures Features; + + // If user asked for the 'native' CPU, we need to autodetect features. + // This is necessary for x86 where the CPU might not support all the + // features the autodetected CPU name lists in the target. For example, + // not all Sandybridge processors support AVX. + if (MCPU == "native") { + StringMap<bool> HostFeatures; + if (sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + } + + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + + return Features.getFeatures(); +} + +/// Set function attributes of functions in Module M based on CPU, /// Features, and command line flags. LLVM_ATTRIBUTE_UNUSED static void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) { diff --git a/include/llvm/CodeGen/CostTable.h b/include/llvm/CodeGen/CostTable.h index 5a6368c5a0f8..48ad76971520 100644 --- a/include/llvm/CodeGen/CostTable.h +++ b/include/llvm/CodeGen/CostTable.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Cost tables and simple lookup functions +/// Cost tables and simple lookup functions /// //===----------------------------------------------------------------------===// @@ -17,7 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/Support/MachineValueType.h" namespace llvm { diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index f809fc97fe59..7d486b1df56d 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -136,7 +136,7 @@ class DIEAbbrevSet { /// The bump allocator to use when creating DIEAbbrev objects in the uniqued /// storage container. BumpPtrAllocator &Alloc; - /// \brief FoldingSet that uniques the abbreviations. + /// FoldingSet that uniques the abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; /// A list of all the unique abbreviations in use. std::vector<DIEAbbrev *> Abbreviations; @@ -190,7 +190,7 @@ public: uint64_t getValue() const { return Integer; } void setValue(uint64_t Val) { Integer = Val; } - void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; + void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; void print(raw_ostream &O) const; @@ -868,7 +868,7 @@ public: return dwarf::DW_FORM_block; } - void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; + void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; void print(raw_ostream &O) const; @@ -899,7 +899,7 @@ public: return dwarf::DW_FORM_block; } - void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; + void EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; void print(raw_ostream &O) const; diff --git a/include/llvm/CodeGen/DwarfStringPoolEntry.h b/include/llvm/CodeGen/DwarfStringPoolEntry.h index fc2b5ddd2d2c..e6c0483cfc35 100644 --- a/include/llvm/CodeGen/DwarfStringPoolEntry.h +++ b/include/llvm/CodeGen/DwarfStringPoolEntry.h @@ -41,6 +41,8 @@ public: unsigned getOffset() const { return I->second.Offset; } unsigned getIndex() const { return I->second.Index; } StringRef getString() const { return I->first(); } + /// Return the entire string pool entry for convenience. + DwarfStringPoolEntry getEntry() const { return I->getValue(); } bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; } bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; } diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h deleted file mode 100644 index f4db8b7322da..000000000000 --- a/include/llvm/CodeGen/ExecutionDepsFix.h +++ /dev/null @@ -1,230 +0,0 @@ -//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file Execution Dependency Fix pass. -/// -/// Some X86 SSE instructions like mov, and, or, xor are available in different -/// variants for different operand types. These variant instructions are -/// equivalent, but on Nehalem and newer cpus there is extra latency -/// transferring data between integer and floating point domains. ARM cores -/// have similar issues when they are configured with both VFP and NEON -/// pipelines. -/// -/// This pass changes the variant instructions to minimize domain crossings. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H -#define LLVM_CODEGEN_EXECUTIONDEPSFIX_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/Pass.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/MathExtras.h" -#include <cassert> -#include <limits> -#include <utility> -#include <vector> - -namespace llvm { - -class MachineBasicBlock; -class MachineInstr; -class TargetInstrInfo; - -/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track -/// of execution domains. -/// -/// An open DomainValue represents a set of instructions that can still switch -/// execution domain. Multiple registers may refer to the same open -/// DomainValue - they will eventually be collapsed to the same execution -/// domain. -/// -/// A collapsed DomainValue represents a single register that has been forced -/// into one of more execution domains. There is a separate collapsed -/// DomainValue for each register, but it may contain multiple execution -/// domains. A register value is initially created in a single execution -/// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact that the register is now available in multiple -/// domains. -struct DomainValue { - // Basic reference counting. - unsigned Refs = 0; - - // Bitmask of available domains. For an open DomainValue, it is the still - // possible domains for collapsing. For a collapsed DomainValue it is the - // domains where the register is available for free. - unsigned AvailableDomains; - - // Pointer to the next DomainValue in a chain. When two DomainValues are - // merged, Victim.Next is set to point to Victor, so old DomainValue - // references can be updated by following the chain. - DomainValue *Next; - - // Twiddleable instructions using or defining these registers. - SmallVector<MachineInstr*, 8> Instrs; - - DomainValue() { clear(); } - - // A collapsed DomainValue has no instructions to twiddle - it simply keeps - // track of the domains where the registers are already available. - bool isCollapsed() const { return Instrs.empty(); } - - // Is domain available? - bool hasDomain(unsigned domain) const { - assert(domain < - static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && - "undefined behavior"); - return AvailableDomains & (1u << domain); - } - - // Mark domain as available. - void addDomain(unsigned domain) { - AvailableDomains |= 1u << domain; - } - - // Restrict to a single domain available. - void setSingleDomain(unsigned domain) { - AvailableDomains = 1u << domain; - } - - // Return bitmask of domains that are available and in mask. - unsigned getCommonDomains(unsigned mask) const { - return AvailableDomains & mask; - } - - // First domain available. - unsigned getFirstDomain() const { - return countTrailingZeros(AvailableDomains); - } - - // Clear this DomainValue and point to next which has all its data. - void clear() { - AvailableDomains = 0; - Next = nullptr; - Instrs.clear(); - } -}; - -/// Information about a live register. -struct LiveReg { - /// Value currently in this register, or NULL when no value is being tracked. - /// This counts as a DomainValue reference. - DomainValue *Value; - - /// Instruction that defined this register, relative to the beginning of the - /// current basic block. When a LiveReg is used to represent a live-out - /// register, this value is relative to the end of the basic block, so it - /// will be a negative number. - int Def; -}; - -class ExecutionDepsFix : public MachineFunctionPass { - SpecificBumpPtrAllocator<DomainValue> Allocator; - SmallVector<DomainValue*,16> Avail; - - const TargetRegisterClass *const RC; - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RegClassInfo; - std::vector<SmallVector<int, 1>> AliasMap; - const unsigned NumRegs; - LiveReg *LiveRegs; - struct MBBInfo { - // Keeps clearance and domain information for all registers. Note that this - // is different from the usual definition notion of liveness. The CPU - // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs = nullptr; - - // Whether we have gotten to this block in primary processing yet. - bool PrimaryCompleted = false; - - // The number of predecessors for which primary processing has completed - unsigned IncomingProcessed = 0; - - // The value of `IncomingProcessed` at the start of primary processing - unsigned PrimaryIncoming = 0; - - // The number of predecessors for which all processing steps are done. - unsigned IncomingCompleted = 0; - - MBBInfo() = default; - }; - using MBBInfoMap = DenseMap<MachineBasicBlock *, MBBInfo>; - MBBInfoMap MBBInfos; - - /// List of undefined register reads in this block in forward order. - std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; - - /// Storage for register unit liveness. - LivePhysRegs LiveRegSet; - - /// Current instruction number. - /// The first instruction in each basic block is 0. - int CurInstr; - -public: - ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) - : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - -private: - iterator_range<SmallVectorImpl<int>::const_iterator> - regIndices(unsigned Reg) const; - // DomainValue allocation. - DomainValue *alloc(int domain = -1); - DomainValue *retain(DomainValue *DV) { - if (DV) ++DV->Refs; - return DV; - } - void release(DomainValue*); - DomainValue *resolve(DomainValue*&); - - // LiveRegs manipulations. - void setLiveReg(int rx, DomainValue *DV); - void kill(int rx); - void force(int rx, unsigned domain); - void collapse(DomainValue *dv, unsigned domain); - bool merge(DomainValue *A, DomainValue *B); - - void enterBasicBlock(MachineBasicBlock*); - void leaveBasicBlock(MachineBasicBlock*); - bool isBlockDone(MachineBasicBlock *); - void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass); - bool visitInstr(MachineInstr *); - void processDefs(MachineInstr *, bool breakDependency, bool Kill); - void visitSoftInstr(MachineInstr*, unsigned mask); - void visitHardInstr(MachineInstr*, unsigned domain); - bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, - unsigned Pref); - bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); - void processUndefReads(MachineBasicBlock*); -}; - -} // end namepsace llvm - -#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H diff --git a/include/llvm/CodeGen/ExecutionDomainFix.h b/include/llvm/CodeGen/ExecutionDomainFix.h new file mode 100644 index 000000000000..338c214dd073 --- /dev/null +++ b/include/llvm/CodeGen/ExecutionDomainFix.h @@ -0,0 +1,213 @@ +//==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Execution Domain Fix pass. +/// +/// Some X86 SSE instructions like mov, and, or, xor are available in different +/// variants for different operand types. These variant instructions are +/// equivalent, but on Nehalem and newer cpus there is extra latency +/// transferring data between integer and floating point domains. ARM cores +/// have similar issues when they are configured with both VFP and NEON +/// pipelines. +/// +/// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXECUTIONDOMAINFIX_H +#define LLVM_CODEGEN_EXECUTIONDOMAINFIX_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineInstr; +class TargetInstrInfo; + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact that the register is now available in multiple +/// domains. +struct DomainValue { + /// Basic reference counting. + unsigned Refs = 0; + + /// Bitmask of available domains. For an open DomainValue, it is the still + /// possible domains for collapsing. For a collapsed DomainValue it is the + /// domains where the register is available for free. + unsigned AvailableDomains; + + /// Pointer to the next DomainValue in a chain. When two DomainValues are + /// merged, Victim.Next is set to point to Victor, so old DomainValue + /// references can be updated by following the chain. + DomainValue *Next; + + /// Twiddleable instructions using or defining these registers. + SmallVector<MachineInstr *, 8> Instrs; + + DomainValue() { clear(); } + + /// A collapsed DomainValue has no instructions to twiddle - it simply keeps + /// track of the domains where the registers are already available. + bool isCollapsed() const { return Instrs.empty(); } + + /// Is domain available? + bool hasDomain(unsigned domain) const { + assert(domain < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); + return AvailableDomains & (1u << domain); + } + + /// Mark domain as available. + void addDomain(unsigned domain) { AvailableDomains |= 1u << domain; } + + // Restrict to a single domain available. + void setSingleDomain(unsigned domain) { AvailableDomains = 1u << domain; } + + /// Return bitmask of domains that are available and in mask. + unsigned getCommonDomains(unsigned mask) const { + return AvailableDomains & mask; + } + + /// First domain available. + unsigned getFirstDomain() const { + return countTrailingZeros(AvailableDomains); + } + + /// Clear this DomainValue and point to next which has all its data. + void clear() { + AvailableDomains = 0; + Next = nullptr; + Instrs.clear(); + } +}; + +class ExecutionDomainFix : public MachineFunctionPass { + SpecificBumpPtrAllocator<DomainValue> Allocator; + SmallVector<DomainValue *, 16> Avail; + + const TargetRegisterClass *const RC; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + std::vector<SmallVector<int, 1>> AliasMap; + const unsigned NumRegs; + /// Value currently in each register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + using LiveRegsDVInfo = std::vector<DomainValue *>; + LiveRegsDVInfo LiveRegs; + /// Keeps domain information for all registers. Note that this + /// is different from the usual definition notion of liveness. The CPU + /// doesn't care whether or not we consider a register killed. + using OutRegsInfoMap = SmallVector<LiveRegsDVInfo, 4>; + OutRegsInfoMap MBBOutRegsInfos; + + ReachingDefAnalysis *RDA; + +public: + ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC) + : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<ReachingDefAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + /// Translate TRI register number to a list of indices into our smaller tables + /// of interesting registers. + iterator_range<SmallVectorImpl<int>::const_iterator> + regIndices(unsigned Reg) const; + + /// DomainValue allocation. + DomainValue *alloc(int domain = -1); + + /// Add reference to DV. + DomainValue *retain(DomainValue *DV) { + if (DV) + ++DV->Refs; + return DV; + } + + /// Release a reference to DV. When the last reference is released, + /// collapse if needed. + void release(DomainValue *); + + /// Follow the chain of dead DomainValues until a live DomainValue is reached. + /// Update the referenced pointer when necessary. + DomainValue *resolve(DomainValue *&); + + /// Set LiveRegs[rx] = dv, updating reference counts. + void setLiveReg(int rx, DomainValue *DV); + + /// Kill register rx, recycle or collapse any DomainValue. + void kill(int rx); + + /// Force register rx into domain. + void force(int rx, unsigned domain); + + /// Collapse open DomainValue into given domain. If there are multiple + /// registers using dv, they each get a unique collapsed DomainValue. + void collapse(DomainValue *dv, unsigned domain); + + /// All instructions and registers in B are moved to A, and B is released. + bool merge(DomainValue *A, DomainValue *B); + + /// Set up LiveRegs by merging predecessor live-out values. + void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Update live-out values. + void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Process he given basic block. + void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Visit given insturcion. + bool visitInstr(MachineInstr *); + + /// Update def-ages for registers defined by MI. + /// If Kill is set, also kill off DomainValues clobbered by the defs. + void processDefs(MachineInstr *, bool Kill); + + /// A soft instruction can be changed to work in other domains given by mask. + void visitSoftInstr(MachineInstr *, unsigned mask); + + /// A hard instruction only works in one domain. All input registers will be + /// forced into that domain. + void visitHardInstr(MachineInstr *, unsigned domain); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_EXECUTIONDOMAINFIX_H diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 85bb826dcb8c..865d8a88b8cc 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" @@ -28,6 +27,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/MachineValueType.h" #include <algorithm> #include <cstdint> #include <utility> @@ -61,7 +61,7 @@ class Type; class User; class Value; -/// \brief This is a fast-path instruction selection class that generates poor +/// This is a fast-path instruction selection class that generates poor /// code and doesn't support illegal types or non-trivial lowering, but runs /// quickly. class FastISel { @@ -78,7 +78,7 @@ public: bool IsReturnValueUsed : 1; bool IsPatchPoint : 1; - // \brief IsTailCall Should be modified by implementations of FastLowerCall + // IsTailCall Should be modified by implementations of FastLowerCall // that perform tail call conversions. bool IsTailCall = false; @@ -215,67 +215,74 @@ protected: const TargetLibraryInfo *LibInfo; bool SkipTargetIndependentISel; - /// \brief The position of the last instruction for materializing constants + /// The position of the last instruction for materializing constants /// for use in the current block. It resets to EmitStartPt when it makes sense /// (for example, it's usually profitable to avoid function calls between the /// definition and the use) MachineInstr *LastLocalValue; - /// \brief The top most instruction in the current block that is allowed for + /// The top most instruction in the current block that is allowed for /// emitting local variables. LastLocalValue resets to EmitStartPt when it /// makes sense (for example, on function calls) MachineInstr *EmitStartPt; + /// Last local value flush point. On a subsequent flush, no local value will + /// sink past this point. + MachineBasicBlock::iterator LastFlushPoint; + public: virtual ~FastISel(); - /// \brief Return the position of the last instruction emitted for + /// Return the position of the last instruction emitted for /// materializing constants for use in the current block. MachineInstr *getLastLocalValue() { return LastLocalValue; } - /// \brief Update the position of the last instruction emitted for + /// Update the position of the last instruction emitted for /// materializing constants for use in the current block. void setLastLocalValue(MachineInstr *I) { EmitStartPt = I; LastLocalValue = I; } - /// \brief Set the current block to which generated machine instructions will - /// be appended, and clear the local CSE map. + /// Set the current block to which generated machine instructions will + /// be appended. void startNewBlock(); - /// \brief Return current debug location information. + /// Flush the local value map and sink local values if possible. + void finishBasicBlock(); + + /// Return current debug location information. DebugLoc getCurDebugLoc() const { return DbgLoc; } - /// \brief Do "fast" instruction selection for function arguments and append + /// Do "fast" instruction selection for function arguments and append /// the machine instructions to the current block. Returns true when /// successful. bool lowerArguments(); - /// \brief Do "fast" instruction selection for the given LLVM IR instruction + /// Do "fast" instruction selection for the given LLVM IR instruction /// and append the generated machine instructions to the current block. /// Returns true if selection was successful. bool selectInstruction(const Instruction *I); - /// \brief Do "fast" instruction selection for the given LLVM IR operator + /// Do "fast" instruction selection for the given LLVM IR operator /// (Instruction or ConstantExpr), and append generated machine instructions /// to the current block. Return true if selection was successful. bool selectOperator(const User *I, unsigned Opcode); - /// \brief Create a virtual register and arrange for it to be assigned the + /// Create a virtual register and arrange for it to be assigned the /// value for the given LLVM value. unsigned getRegForValue(const Value *V); - /// \brief Look up the value to see if its value is already cached in a + /// Look up the value to see if its value is already cached in a /// register. It may be defined by instructions across blocks or defined /// locally. unsigned lookUpRegForValue(const Value *V); - /// \brief This is a wrapper around getRegForValue that also takes care of + /// This is a wrapper around getRegForValue that also takes care of /// truncating or sign-extending the given getelementptr index value. - std::pair<unsigned, bool> getRegForGEPIndex(const Value *V); + std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); - /// \brief We're checking to see if we can fold \p LI into \p FoldInst. Note + /// We're checking to see if we can fold \p LI into \p FoldInst. Note /// that we could have a sequence where multiple LLVM IR instructions are /// folded into the same machineinstr. For example we could have: /// @@ -289,7 +296,7 @@ public: /// If we succeed folding, return true. bool tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst); - /// \brief The specified machine instr operand is a vreg, and that vreg is + /// The specified machine instr operand is a vreg, and that vreg is /// being provided by the specified load instruction. If possible, try to /// fold the load as an operand to the instruction, returning true if /// possible. @@ -300,11 +307,11 @@ public: return false; } - /// \brief Reset InsertPt to prepare for inserting instructions into the + /// Reset InsertPt to prepare for inserting instructions into the /// current block. void recomputeInsertPt(); - /// \brief Remove all dead instructions between the I and E. + /// Remove all dead instructions between the I and E. void removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E); @@ -313,11 +320,11 @@ public: DebugLoc DL; }; - /// \brief Prepare InsertPt to begin inserting instructions into the local + /// Prepare InsertPt to begin inserting instructions into the local /// value area and return the old insert position. SavePoint enterLocalValueArea(); - /// \brief Reset InsertPt to the given old insert position. + /// Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); protected: @@ -325,45 +332,45 @@ protected: const TargetLibraryInfo *LibInfo, bool SkipTargetIndependentISel = false); - /// \brief This method is called by target-independent code when the normal + /// This method is called by target-independent code when the normal /// FastISel process fails to select an instruction. This gives targets a /// chance to emit code for anything that doesn't fit into FastISel's /// framework. It returns true if it was successful. virtual bool fastSelectInstruction(const Instruction *I) = 0; - /// \brief This method is called by target-independent code to do target- + /// This method is called by target-independent code to do target- /// specific argument lowering. It returns true if it was successful. virtual bool fastLowerArguments(); - /// \brief This method is called by target-independent code to do target- + /// This method is called by target-independent code to do target- /// specific call lowering. It returns true if it was successful. virtual bool fastLowerCall(CallLoweringInfo &CLI); - /// \brief This method is called by target-independent code to do target- + /// This method is called by target-independent code to do target- /// specific intrinsic lowering. It returns true if it was successful. virtual bool fastLowerIntrinsicCall(const IntrinsicInst *II); - /// \brief This method is called by target-independent code to request that an + /// This method is called by target-independent code to request that an /// instruction with the given type and opcode be emitted. virtual unsigned fastEmit_(MVT VT, MVT RetVT, unsigned Opcode); - /// \brief This method is called by target-independent code to request that an + /// This method is called by target-independent code to request that an /// instruction with the given type, opcode, and register operand be emitted. virtual unsigned fastEmit_r(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0, bool Op0IsKill); - /// \brief This method is called by target-independent code to request that an + /// This method is called by target-independent code to request that an /// instruction with the given type, opcode, and register operands be emitted. virtual unsigned fastEmit_rr(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - /// \brief This method is called by target-independent code to request that an + /// This method is called by target-independent code to request that an /// instruction with the given type, opcode, and register and immediate /// operands be emitted. virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm); - /// \brief This method is a wrapper of fastEmit_ri. + /// This method is a wrapper of fastEmit_ri. /// /// It first tries to emit an instruction with an immediate operand using /// fastEmit_ri. If that fails, it materializes the immediate into a register @@ -371,89 +378,89 @@ protected: unsigned fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType); - /// \brief This method is called by target-independent code to request that an + /// This method is called by target-independent code to request that an /// instruction with the given type, opcode, and immediate operand be emitted. virtual unsigned fastEmit_i(MVT VT, MVT RetVT, unsigned Opcode, uint64_t Imm); - /// \brief This method is called by target-independent code to request that an + /// This method is called by target-independent code to request that an /// instruction with the given type, opcode, and floating-point immediate /// operand be emitted. virtual unsigned fastEmit_f(MVT VT, MVT RetVT, unsigned Opcode, const ConstantFP *FPImm); - /// \brief Emit a MachineInstr with no operands and a result register in the + /// Emit a MachineInstr with no operands and a result register in the /// given register class. unsigned fastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass *RC); - /// \brief Emit a MachineInstr with one register operand and a result register + /// Emit a MachineInstr with one register operand and a result register /// in the given register class. unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); - /// \brief Emit a MachineInstr with two register operands and a result + /// Emit a MachineInstr with two register operands and a result /// register in the given register class. unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - /// \brief Emit a MachineInstr with three register operands and a result + /// Emit a MachineInstr with three register operands and a result /// register in the given register class. unsigned fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill); - /// \brief Emit a MachineInstr with a register operand, an immediate, and a + /// Emit a MachineInstr with a register operand, an immediate, and a /// result register in the given register class. unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); - /// \brief Emit a MachineInstr with one register operand and two immediate + /// Emit a MachineInstr with one register operand and two immediate /// operands. unsigned fastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2); - /// \brief Emit a MachineInstr with a floating point immediate, and a result + /// Emit a MachineInstr with a floating point immediate, and a result /// register in the given register class. unsigned fastEmitInst_f(unsigned MachineInstOpcode, const TargetRegisterClass *RC, const ConstantFP *FPImm); - /// \brief Emit a MachineInstr with two register operands, an immediate, and a + /// Emit a MachineInstr with two register operands, an immediate, and a /// result register in the given register class. unsigned fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); - /// \brief Emit a MachineInstr with a single immediate operand, and a result + /// Emit a MachineInstr with a single immediate operand, and a result /// register in the given register class. - unsigned fastEmitInst_i(unsigned MachineInstrOpcode, + unsigned fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm); - /// \brief Emit a MachineInstr for an extract_subreg from a specified index of + /// Emit a MachineInstr for an extract_subreg from a specified index of /// a superregister to a specified type. unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx); - /// \brief Emit MachineInstrs to compute the value of Op with all but the + /// Emit MachineInstrs to compute the value of Op with all but the /// least significant bit set to zero. unsigned fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill); - /// \brief Emit an unconditional branch to the given block, unless it is the + /// Emit an unconditional branch to the given block, unless it is the /// immediate (fall-through) successor, and update the CFG. - void fastEmitBranch(MachineBasicBlock *MBB, const DebugLoc &DL); + void fastEmitBranch(MachineBasicBlock *MSucc, const DebugLoc &DbgLoc); /// Emit an unconditional branch to \p FalseMBB, obtains the branch weight /// and adds TrueMBB and FalseMBB to the successor list. void finishCondBranch(const BasicBlock *BranchBB, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); - /// \brief Update the value map to include the new mapping for this + /// Update the value map to include the new mapping for this /// instruction, or insert an extra copy to get the result in a previous /// determined register. /// @@ -464,26 +471,26 @@ protected: unsigned createResultReg(const TargetRegisterClass *RC); - /// \brief Try to constrain Op so that it is usable by argument OpNum of the + /// Try to constrain Op so that it is usable by argument OpNum of the /// provided MCInstrDesc. If this fails, create a new virtual register in the /// correct class and COPY the value there. unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, unsigned OpNum); - /// \brief Emit a constant in a register using target-specific logic, such as + /// Emit a constant in a register using target-specific logic, such as /// constant pool loads. virtual unsigned fastMaterializeConstant(const Constant *C) { return 0; } - /// \brief Emit an alloca address in a register using target-specific logic. + /// Emit an alloca address in a register using target-specific logic. virtual unsigned fastMaterializeAlloca(const AllocaInst *C) { return 0; } - /// \brief Emit the floating-point constant +0.0 in a register using target- + /// Emit the floating-point constant +0.0 in a register using target- /// specific logic. virtual unsigned fastMaterializeFloatZero(const ConstantFP *CF) { return 0; } - /// \brief Check if \c Add is an add that can be safely folded into \c GEP. + /// Check if \c Add is an add that can be safely folded into \c GEP. /// /// \c Add can be folded into \c GEP if: /// - \c Add is an add, @@ -492,16 +499,16 @@ protected: /// - \c Add has a constant operand. bool canFoldAddIntoGEP(const User *GEP, const Value *Add); - /// \brief Test whether the given value has exactly one use. + /// Test whether the given value has exactly one use. bool hasTrivialKill(const Value *V); - /// \brief Create a machine mem operand from the given instruction. + /// Create a machine mem operand from the given instruction. MachineMemOperand *createMachineMemOperandFor(const Instruction *I) const; CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) const; bool lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs); - bool lowerCallTo(const CallInst *CI, const char *SymbolName, + bool lowerCallTo(const CallInst *CI, const char *SymName, unsigned NumArgs); bool lowerCallTo(CallLoweringInfo &CLI); @@ -518,23 +525,24 @@ protected: } bool lowerCall(const CallInst *I); - /// \brief Select and emit code for a binary operator instruction, which has + /// Select and emit code for a binary operator instruction, which has /// an opcode which directly corresponds to the given ISD opcode. bool selectBinaryOp(const User *I, unsigned ISDOpcode); bool selectFNeg(const User *I); bool selectGetElementPtr(const User *I); bool selectStackmap(const CallInst *I); bool selectPatchpoint(const CallInst *I); - bool selectCall(const User *Call); + bool selectCall(const User *I); bool selectIntrinsicCall(const IntrinsicInst *II); bool selectBitCast(const User *I); bool selectCast(const User *I, unsigned Opcode); - bool selectExtractValue(const User *I); + bool selectExtractValue(const User *U); bool selectInsertValue(const User *I); bool selectXRayCustomEvent(const CallInst *II); + bool selectXRayTypedEvent(const CallInst *II); private: - /// \brief Handle PHI nodes in successor blocks. + /// Handle PHI nodes in successor blocks. /// /// Emit code to ensure constants are copied into registers when needed. /// Remember the virtual registers that need to be added to the Machine PHI @@ -543,27 +551,41 @@ private: /// correspond to a different MBB than the end. bool handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); - /// \brief Helper for materializeRegForValue to materialize a constant in a + /// Helper for materializeRegForValue to materialize a constant in a /// target-independent way. unsigned materializeConstant(const Value *V, MVT VT); - /// \brief Helper for getRegForVale. This function is called when the value + /// Helper for getRegForVale. This function is called when the value /// isn't already available in a register and must be materialized with new /// instructions. unsigned materializeRegForValue(const Value *V, MVT VT); - /// \brief Clears LocalValueMap and moves the area for the new local variables + /// Clears LocalValueMap and moves the area for the new local variables /// to the beginning of the block. It helps to avoid spilling cached variables /// across heavy instructions like calls. void flushLocalValueMap(); - /// \brief Removes dead local value instructions after SavedLastLocalvalue. + /// Removes dead local value instructions after SavedLastLocalvalue. void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue); - /// \brief Insertion point before trying to select the current instruction. + struct InstOrderMap { + DenseMap<MachineInstr *, unsigned> Orders; + MachineInstr *FirstTerminator = nullptr; + unsigned FirstTerminatorOrder = std::numeric_limits<unsigned>::max(); + + void initialize(MachineBasicBlock *MBB, + MachineBasicBlock::iterator LastFlushPoint); + }; + + /// Sinks the local value materialization instruction LocalMI to its first use + /// in the basic block, or deletes it if it is not used. + void sinkLocalValueMaterialization(MachineInstr &LocalMI, unsigned DefReg, + InstOrderMap &OrderMap); + + /// Insertion point before trying to select the current instruction. MachineBasicBlock::iterator SavedInsertPt; - /// \brief Add a stackmap or patchpoint intrinsic call's live variable + /// Add a stackmap or patchpoint intrinsic call's live variable /// operands to a stackmap or patchpoint machine instruction. bool addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, const CallInst *CI, unsigned StartIdx); diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index 3b39d87ffb4a..2da00b7d61ab 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -118,6 +118,17 @@ public: /// cross-basic-block values. DenseMap<const Value *, unsigned> ValueMap; + /// VirtReg2Value map is needed by the Divergence Analysis driven + /// instruction selection. It is reverted ValueMap. It is computed + /// in lazy style - on demand. It is used to get the Value corresponding + /// to the live in virtual register and is called from the + /// TargetLowerinInfo::isSDNodeSourceOfDivergence. + DenseMap<unsigned, const Value*> VirtReg2Value; + + /// This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence + /// to get the Value corresponding to the live-in virtual register. + const Value * getValueFromVirtualReg(unsigned Vreg); + /// Track virtual registers created for exception pointers. DenseMap<const Value *, unsigned> CatchPadExceptionPointers; @@ -167,6 +178,8 @@ public: /// RegFixups - Registers which need to be replaced after isel is done. DenseMap<unsigned, unsigned> RegFixups; + DenseSet<unsigned> RegsWithFixups; + /// StatepointStackSlots - A list of temporary stack slots (frame indices) /// used to spill values at a statepoint. We store them here to enable /// reuse of the same stack slots across different statepoints in different diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h index 16168e785f81..91604fd2df87 100644 --- a/include/llvm/CodeGen/GCStrategy.h +++ b/include/llvm/CodeGen/GCStrategy.h @@ -105,12 +105,12 @@ public: /// By default, write barriers are replaced with simple store /// instructions. If true, you must provide a custom pass to lower - /// calls to @llvm.gcwrite. + /// calls to \@llvm.gcwrite. bool customWriteBarrier() const { return CustomWriteBarriers; } /// By default, read barriers are replaced with simple load /// instructions. If true, you must provide a custom pass to lower - /// calls to @llvm.gcread. + /// calls to \@llvm.gcread. bool customReadBarrier() const { return CustomReadBarriers; } /// Returns true if this strategy is expecting the use of gc.statepoints, @@ -147,7 +147,7 @@ public: /// By default, roots are left for the code generator so it can generate a /// stack map. If true, you must provide a custom pass to lower - /// calls to @llvm.gcroot. + /// calls to \@llvm.gcroot. bool customRoots() const { return CustomRoots; } /// If set, gcroot intrinsics should initialize their allocas to null diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index ba84d76de164..58eb412d8c24 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -17,11 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include <cstdint> #include <functional> @@ -123,7 +123,7 @@ protected: } template <typename FuncInfoTy> - void setArgFlags(ArgInfo &Arg, unsigned OpNum, const DataLayout &DL, + void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const; /// Invoke Handler::assignArg on each of the given \p Args and then use @@ -131,7 +131,7 @@ protected: /// /// \return True if everything has succeeded, false otherwise. bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args, - ValueHandler &Callback) const; + ValueHandler &Handler) const; public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} diff --git a/include/llvm/CodeGen/GlobalISel/Combiner.h b/include/llvm/CodeGen/GlobalISel/Combiner.h new file mode 100644 index 000000000000..36a33deb4a64 --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/Combiner.h @@ -0,0 +1,43 @@ +//== ----- llvm/CodeGen/GlobalISel/Combiner.h --------------------- == // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// This contains common code to drive combines. Combiner Passes will need to +/// setup a CombinerInfo and call combineMachineFunction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H +#define LLVM_CODEGEN_GLOBALISEL_COMBINER_H + +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { +class MachineRegisterInfo; +class CombinerInfo; +class TargetPassConfig; +class MachineFunction; + +class Combiner { +public: + Combiner(CombinerInfo &CombinerInfo, const TargetPassConfig *TPC); + + bool combineMachineInstrs(MachineFunction &MF); + +protected: + CombinerInfo &CInfo; + + MachineRegisterInfo *MRI = nullptr; + const TargetPassConfig *TPC; + MachineIRBuilder Builder; +}; + +} // End namespace llvm. + +#endif // LLVM_CODEGEN_GLOBALISEL_GICOMBINER_H diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h new file mode 100644 index 000000000000..5d5b8398452c --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -0,0 +1,44 @@ +//== llvm/CodeGen/GlobalISel/CombinerHelper.h -------------- -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +/// This contains common combine transformations that may be used in a combine +/// pass,or by the target elsewhere. +/// Targets can pick individual opcode transformations from the helper or use +/// tryCombine which invokes all transformations. All of the transformations +/// return true if the MachineInstruction changed and false otherwise. +// +//===--------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H +#define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H + +namespace llvm { + +class MachineIRBuilder; +class MachineRegisterInfo; +class MachineInstr; + +class CombinerHelper { + MachineIRBuilder &Builder; + MachineRegisterInfo &MRI; + +public: + CombinerHelper(MachineIRBuilder &B); + + /// If \p MI is COPY, try to combine it. + /// Returns true if MI changed. + bool tryCombineCopy(MachineInstr &MI); + + /// Try to transform \p MI by using all of the above + /// combine functions. Returns true if changed. + bool tryCombine(MachineInstr &MI); +}; +} // namespace llvm + +#endif diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h new file mode 100644 index 000000000000..1d248547adbf --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -0,0 +1,48 @@ +//===- llvm/CodeGen/GlobalISel/CombinerInfo.h ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// Interface for Targets to specify which operations are combined how and when. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_INFO_H +#define LLVM_CODEGEN_GLOBALISEL_COMBINER_INFO_H + +#include <cassert> +namespace llvm { + +class LegalizerInfo; +class MachineInstr; +class MachineIRBuilder; +class MachineRegisterInfo; +// Contains information relevant to enabling/disabling various combines for a +// pass. +class CombinerInfo { +public: + CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal, + LegalizerInfo *LInfo) + : IllegalOpsAllowed(AllowIllegalOps), + LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) { + assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) && + "Expecting legalizerInfo when illegalops not allowed"); + } + virtual ~CombinerInfo() = default; + /// If \p IllegalOpsAllowed is false, the CombinerHelper will make use of + /// the legalizerInfo to check for legality before each transformation. + bool IllegalOpsAllowed; // TODO: Make use of this. + + /// If \p LegalizeIllegalOps is true, the Combiner will also legalize the + /// illegal ops that are created. + bool LegalizeIllegalOps; // TODO: Make use of this. + const LegalizerInfo *LInfo; + virtual bool combine(MachineInstr &MI, MachineIRBuilder &B) const = 0; +}; +} // namespace llvm + +#endif diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h new file mode 100644 index 000000000000..8d61f9a68279 --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h @@ -0,0 +1,134 @@ +//===-- llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a version of MachineIRBuilder which does trivial +/// constant folding. +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" + +namespace llvm { + +static Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, + const unsigned Op2, + const MachineRegisterInfo &MRI) { + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI); + if (MaybeOp1Cst && MaybeOp2Cst) { + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_ADD: + return C1 + C2; + case TargetOpcode::G_AND: + return C1 & C2; + case TargetOpcode::G_ASHR: + return C1.ashr(C2); + case TargetOpcode::G_LSHR: + return C1.lshr(C2); + case TargetOpcode::G_MUL: + return C1 * C2; + case TargetOpcode::G_OR: + return C1 | C2; + case TargetOpcode::G_SHL: + return C1 << C2; + case TargetOpcode::G_SUB: + return C1 - C2; + case TargetOpcode::G_XOR: + return C1 ^ C2; + case TargetOpcode::G_UDIV: + if (!C2.getBoolValue()) + break; + return C1.udiv(C2); + case TargetOpcode::G_SDIV: + if (!C2.getBoolValue()) + break; + return C1.sdiv(C2); + case TargetOpcode::G_UREM: + if (!C2.getBoolValue()) + break; + return C1.urem(C2); + case TargetOpcode::G_SREM: + if (!C2.getBoolValue()) + break; + return C1.srem(C2); + } + } + return None; +} + +/// An MIRBuilder which does trivial constant folding of binary ops. +/// Calls to buildInstr will also try to constant fold binary ops. +class ConstantFoldingMIRBuilder + : public FoldableInstructionsBuilder<ConstantFoldingMIRBuilder> { +public: + // Pull in base class constructors. + using FoldableInstructionsBuilder< + ConstantFoldingMIRBuilder>::FoldableInstructionsBuilder; + // Unhide buildInstr + using FoldableInstructionsBuilder<ConstantFoldingMIRBuilder>::buildInstr; + + // Implement buildBinaryOp required by FoldableInstructionsBuilder which + // tries to constant fold. + MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst, + unsigned Src0, unsigned Src1) { + validateBinaryOp(Dst, Src0, Src1); + auto MaybeCst = ConstantFoldBinOp(Opcode, Src0, Src1, getMF().getRegInfo()); + if (MaybeCst) + return buildConstant(Dst, MaybeCst->getSExtValue()); + return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1); + } + + template <typename DstTy, typename UseArg1Ty, typename UseArg2Ty> + MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, UseArg1Ty &&Arg1, + UseArg2Ty &&Arg2) { + unsigned Dst = getDestFromArg(Ty); + return buildInstr(Opc, Dst, getRegFromArg(std::forward<UseArg1Ty>(Arg1)), + getRegFromArg(std::forward<UseArg2Ty>(Arg2))); + } + + // Try to provide an overload for buildInstr for binary ops in order to + // constant fold. + MachineInstrBuilder buildInstr(unsigned Opc, unsigned Dst, unsigned Src0, + unsigned Src1) { + switch (Opc) { + default: + break; + case TargetOpcode::G_ADD: + case TargetOpcode::G_AND: + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_MUL: + case TargetOpcode::G_OR: + case TargetOpcode::G_SHL: + case TargetOpcode::G_SUB: + case TargetOpcode::G_XOR: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SREM: { + return buildBinaryOp(Opc, Dst, Src0, Src1); + } + } + return buildInstr(Opc).addDef(Dst).addUse(Src0).addUse(Src1); + } + + // Fallback implementation of buildInstr. + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, + UseArgsTy &&... Args) { + auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty)); + addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...); + return MIB; + } +}; +} // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 7061c014d9b7..f3553966fcdf 100644 --- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -24,6 +24,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Types.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Allocator.h" #include "llvm/IR/Intrinsics.h" #include <memory> #include <utility> @@ -63,9 +64,83 @@ private: /// Interface used to lower the everything related to calls. const CallLowering *CLI; - /// Mapping of the values of the current LLVM IR function - /// to the related virtual registers. - ValueToVReg ValToVReg; + /// This class contains the mapping between the Values to vreg related data. + class ValueToVRegInfo { + public: + ValueToVRegInfo() = default; + + using VRegListT = SmallVector<unsigned, 1>; + using OffsetListT = SmallVector<uint64_t, 1>; + + using const_vreg_iterator = + DenseMap<const Value *, VRegListT *>::const_iterator; + using const_offset_iterator = + DenseMap<const Value *, OffsetListT *>::const_iterator; + + inline const_vreg_iterator vregs_end() const { return ValToVRegs.end(); } + + VRegListT *getVRegs(const Value &V) { + auto It = ValToVRegs.find(&V); + if (It != ValToVRegs.end()) + return It->second; + + return insertVRegs(V); + } + + OffsetListT *getOffsets(const Value &V) { + auto It = TypeToOffsets.find(V.getType()); + if (It != TypeToOffsets.end()) + return It->second; + + return insertOffsets(V); + } + + const_vreg_iterator findVRegs(const Value &V) const { + return ValToVRegs.find(&V); + } + + bool contains(const Value &V) const { + return ValToVRegs.find(&V) != ValToVRegs.end(); + } + + void reset() { + ValToVRegs.clear(); + TypeToOffsets.clear(); + VRegAlloc.DestroyAll(); + OffsetAlloc.DestroyAll(); + } + + private: + VRegListT *insertVRegs(const Value &V) { + assert(ValToVRegs.find(&V) == ValToVRegs.end() && "Value already exists"); + + // We placement new using our fast allocator since we never try to free + // the vectors until translation is finished. + auto *VRegList = new (VRegAlloc.Allocate()) VRegListT(); + ValToVRegs[&V] = VRegList; + return VRegList; + } + + OffsetListT *insertOffsets(const Value &V) { + assert(TypeToOffsets.find(V.getType()) == TypeToOffsets.end() && + "Type already exists"); + + auto *OffsetList = new (OffsetAlloc.Allocate()) OffsetListT(); + TypeToOffsets[V.getType()] = OffsetList; + return OffsetList; + } + SpecificBumpPtrAllocator<VRegListT> VRegAlloc; + SpecificBumpPtrAllocator<OffsetListT> OffsetAlloc; + + // We store pointers to vectors here since references may be invalidated + // while we hold them if we stored the vectors directly. + DenseMap<const Value *, VRegListT*> ValToVRegs; + DenseMap<const Type *, OffsetListT*> TypeToOffsets; + }; + + /// Mapping of the values of the current LLVM IR function to the related + /// virtual registers and offsets. + ValueToVRegInfo VMap; // N.b. it's not completely obvious that this will be sufficient for every // LLVM IR construct (with "invoke" being the obvious candidate to mess up our @@ -82,7 +157,8 @@ private: // List of stubbed PHI instructions, for values and basic blocks to be filled // in once all MachineBasicBlocks have been created. - SmallVector<std::pair<const PHINode *, MachineInstr *>, 4> PendingPHIs; + SmallVector<std::pair<const PHINode *, SmallVector<MachineInstr *, 1>>, 4> + PendingPHIs; /// Record of what frame index has been allocated to specified allocas for /// this function. @@ -99,7 +175,7 @@ private: /// The general algorithm is: /// 1. Look for a virtual register for each operand or /// create one. - /// 2 Update the ValToVReg accordingly. + /// 2 Update the VMap accordingly. /// 2.alt. For constant arguments, if they are compile time constants, /// produce an immediate in the right operand and do not touch /// ValToReg. Actually we will go with a virtual register for each @@ -134,7 +210,7 @@ private: /// Translate an LLVM string intrinsic (memcpy, memset, ...). bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned Intrinsic); + unsigned ID); void getStackGuard(unsigned DstReg, MachineIRBuilder &MIRBuilder); @@ -146,6 +222,19 @@ private: bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder); + // FIXME: temporary function to expose previous interface to call lowering + // until it is refactored. + /// Combines all component registers of \p V into a single scalar with size + /// "max(Offsets) + last size". + unsigned packRegs(const Value &V, MachineIRBuilder &MIRBuilder); + + void unpackRegs(const Value &V, unsigned Src, MachineIRBuilder &MIRBuilder); + + /// Returns true if the value should be split into multiple LLTs. + /// If \p Offsets is given then the split type's offsets will be stored in it. + bool valueIsSplit(const Value &V, + SmallVectorImpl<uint64_t> *Offsets = nullptr); + /// Translate call instruction. /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); @@ -310,6 +399,9 @@ private: bool translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder); + bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder); + bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder); + // Stubs to keep the compiler happy while we implement the rest of the // translation. bool translateResume(const User &U, MachineIRBuilder &MIRBuilder) { @@ -327,14 +419,8 @@ private: bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) { return false; } - bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder) { - return false; - } - bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) { - return false; - } bool translateAddrSpaceCast(const User &U, MachineIRBuilder &MIRBuilder) { - return false; + return translateCast(TargetOpcode::G_ADDRSPACE_CAST, U, MIRBuilder); } bool translateCleanupPad(const User &U, MachineIRBuilder &MIRBuilder) { return false; @@ -381,9 +467,24 @@ private: // * Clear the different maps. void finalizeFunction(); - /// Get the VReg that represents \p Val. - /// If such VReg does not exist, it is created. - unsigned getOrCreateVReg(const Value &Val); + /// Get the VRegs that represent \p Val. + /// Non-aggregate types have just one corresponding VReg and the list can be + /// used as a single "unsigned". Aggregates get flattened. If such VRegs do + /// not exist, they are created. + ArrayRef<unsigned> getOrCreateVRegs(const Value &Val); + + unsigned getOrCreateVReg(const Value &Val) { + auto Regs = getOrCreateVRegs(Val); + if (Regs.empty()) + return 0; + assert(Regs.size() == 1 && + "attempt to get single VReg for aggregate or void"); + return Regs[0]; + } + + /// Allocate some vregs and offsets in the VMap. Then populate just the + /// offsets while leaving the vregs empty. + ValueToVRegInfo::VRegListT &allocateVRegs(const Value &Val); /// Get the frame index that represents \p Val. /// If such VReg does not exist, it is created. diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 4264a866b6c0..471def7f45a3 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -20,6 +20,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CodeGenCoverage.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include <bitset> #include <cstddef> #include <cstdint> @@ -31,7 +32,6 @@ namespace llvm { class APInt; class APFloat; -class LLT; class MachineInstr; class MachineInstrBuilder; class MachineFunction; @@ -81,6 +81,23 @@ enum { /// failed match. GIM_Try, + /// Switch over the opcode on the specified instruction + /// - InsnID - Instruction ID + /// - LowerBound - numerically minimum opcode supported + /// - UpperBound - numerically maximum + 1 opcode supported + /// - Default - failure jump target + /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets + GIM_SwitchOpcode, + + /// Switch over the LLT on the specified instruction operand + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - LowerBound - numerically minimum Type ID supported + /// - UpperBound - numerically maximum + 1 Type ID supported + /// - Default - failure jump target + /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets + GIM_SwitchType, + /// Record the specified instruction /// - NewInsnID - Instruction ID to define /// - InsnID - Instruction ID @@ -117,6 +134,23 @@ enum { GIM_CheckAtomicOrdering, GIM_CheckAtomicOrderingOrStrongerThan, GIM_CheckAtomicOrderingWeakerThan, + /// Check the size of the memory access for the given machine memory operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - Size - The size in bytes of the memory access + GIM_CheckMemorySizeEqualTo, + /// Check the size of the memory access for the given machine memory operand + /// against the size of an operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - OpIdx - The operand index to compare the MMO against + GIM_CheckMemorySizeEqualToLLT, + GIM_CheckMemorySizeLessThanLLT, + GIM_CheckMemorySizeGreaterThanLLT, + /// Check a generic C++ instruction predicate + /// - InsnID - Instruction ID + /// - PredicateID - The ID of the predicate function to call + GIM_CheckCxxInsnPredicate, /// Check the type for the specified operand /// - InsnID - Instruction ID @@ -133,12 +167,14 @@ enum { /// - OpIdx - Operand index /// - Expected register bank (specified as a register class) GIM_CheckRegBankForClass, + /// Check the operand matches a complex predicate /// - InsnID - Instruction ID /// - OpIdx - Operand index /// - RendererID - The renderer to hold the result /// - Complex predicate ID GIM_CheckComplexPattern, + /// Check the operand is a specific integer /// - InsnID - Instruction ID /// - OpIdx - Operand index @@ -155,6 +191,7 @@ enum { /// - OpIdx - Operand index /// - Expected Intrinsic ID GIM_CheckIntrinsicID, + /// Check the specified operand is an MBB /// - InsnID - Instruction ID /// - OpIdx - Operand index @@ -183,6 +220,7 @@ enum { /// - OldInsnID - Instruction ID to mutate /// - NewOpcode - The new opcode to use GIR_MutateOpcode, + /// Build a new instruction /// - InsnID - Instruction ID to define /// - Opcode - The new opcode to use @@ -193,6 +231,7 @@ enum { /// - OldInsnID - Instruction ID to copy from /// - OpIdx - The operand to copy GIR_Copy, + /// Copy an operand to the specified instruction or add a zero register if the /// operand is a zero immediate. /// - NewInsnID - Instruction ID to modify @@ -206,6 +245,7 @@ enum { /// - OpIdx - The operand to copy /// - SubRegIdx - The subregister to copy GIR_CopySubReg, + /// Add an implicit register def to the specified instruction /// - InsnID - Instruction ID to modify /// - RegNum - The register to add @@ -218,10 +258,13 @@ enum { /// - InsnID - Instruction ID to modify /// - RegNum - The register to add GIR_AddRegister, - /// Add a a temporary register to the specified instruction + + /// Add a temporary register to the specified instruction /// - InsnID - Instruction ID to modify /// - TempRegID - The temporary register ID to add + /// - TempRegFlags - The register flags to set GIR_AddTempRegister, + /// Add an immediate to the specified instruction /// - InsnID - Instruction ID to modify /// - Imm - The immediate to add @@ -230,11 +273,17 @@ enum { /// - InsnID - Instruction ID to modify /// - RendererID - The renderer to call GIR_ComplexRenderer, + /// Render sub-operands of complex operands to the specified instruction /// - InsnID - Instruction ID to modify /// - RendererID - The renderer to call /// - RenderOpID - The suboperand to render. GIR_ComplexSubOperandRenderer, + /// Render operands to the specified instruction using a custom function + /// - InsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to get the matched operand from + /// - RendererFnID - Custom renderer function to call + GIR_CustomRenderer, /// Render a G_CONSTANT operator as a sign-extended immediate. /// - NewInsnID - Instruction ID to modify @@ -242,24 +291,34 @@ enum { /// The operand index is implicitly 1. GIR_CopyConstantAsSImm, + /// Render a G_FCONSTANT operator as a sign-extended immediate. + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// The operand index is implicitly 1. + GIR_CopyFConstantAsFPImm, + /// Constrain an instruction operand to a register class. /// - InsnID - Instruction ID to modify /// - OpIdx - Operand index /// - RCEnum - Register class enumeration value GIR_ConstrainOperandRC, + /// Constrain an instructions operands according to the instruction /// description. /// - InsnID - Instruction ID to modify GIR_ConstrainSelectedInstOperands, + /// Merge all memory operands into instruction. /// - InsnID - Instruction ID to modify /// - MergeInsnID... - One or more Instruction ID to merge into the result. /// - GIU_MergeMemOperands_EndOfList - Terminates the list of instructions to /// merge. GIR_MergeMemOperands, + /// Erase from parent. /// - InsnID - Instruction ID to erase GIR_EraseFromParent, + /// Create a new temporary register that's not constrained. /// - TempRegID - The temporary register ID to initialize. /// - Expected type @@ -271,6 +330,9 @@ enum { /// Increment the rule coverage counter. /// - RuleID - The ID of the rule that was covered. GIR_Coverage, + + /// Keeping track of the number of the GI opcodes. Must be the last entry. + GIU_NumOpcodes, }; enum { @@ -311,11 +373,27 @@ protected: }; public: - template <class PredicateBitset, class ComplexMatcherMemFn> - struct MatcherInfoTy { + template <class PredicateBitset, class ComplexMatcherMemFn, + class CustomRendererFn> + struct ISelInfoTy { + ISelInfoTy(const LLT *TypeObjects, size_t NumTypeObjects, + const PredicateBitset *FeatureBitsets, + const ComplexMatcherMemFn *ComplexPredicates, + const CustomRendererFn *CustomRenderers) + : TypeObjects(TypeObjects), + FeatureBitsets(FeatureBitsets), + ComplexPredicates(ComplexPredicates), + CustomRenderers(CustomRenderers) { + + for (size_t I = 0; I < NumTypeObjects; ++I) + TypeIDMap[TypeObjects[I]] = I; + } const LLT *TypeObjects; const PredicateBitset *FeatureBitsets; const ComplexMatcherMemFn *ComplexPredicates; + const CustomRendererFn *CustomRenderers; + + SmallDenseMap<LLT, unsigned, 64> TypeIDMap; }; protected: @@ -324,23 +402,35 @@ protected: /// Execute a given matcher table and return true if the match was successful /// and false otherwise. template <class TgtInstructionSelector, class PredicateBitset, - class ComplexMatcherMemFn> + class ComplexMatcherMemFn, class CustomRendererFn> bool executeMatchTable( TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, - const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo, + const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn> + &ISelInfo, const int64_t *MatchTable, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, CodeGenCoverage &CoverageInfo) const; + virtual const int64_t *getMatchTable() const { + llvm_unreachable("Should have been overridden by tablegen if used"); + } + virtual bool testImmPredicate_I64(unsigned, int64_t) const { - llvm_unreachable("Subclasses must override this to use tablegen"); + llvm_unreachable( + "Subclasses must override this with a tablegen-erated function"); } virtual bool testImmPredicate_APInt(unsigned, const APInt &) const { - llvm_unreachable("Subclasses must override this to use tablegen"); + llvm_unreachable( + "Subclasses must override this with a tablegen-erated function"); } virtual bool testImmPredicate_APFloat(unsigned, const APFloat &) const { - llvm_unreachable("Subclasses must override this to use tablegen"); + llvm_unreachable( + "Subclasses must override this with a tablegen-erated function"); + } + virtual bool testMIPredicate_MI(unsigned, const MachineInstr &) const { + llvm_unreachable( + "Subclasses must override this with a tablegen-erated function"); } /// Constrain a register operand of an instruction \p I to a specified @@ -353,20 +443,6 @@ protected: const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const; - /// Mutate the newly-selected instruction \p I to constrain its (possibly - /// generic) virtual register operands to the instruction's register class. - /// This could involve inserting COPYs before (for uses) or after (for defs). - /// This requires the number of operands to match the instruction description. - /// \returns whether operand regclass constraining succeeded. - /// - // FIXME: Not all instructions have the same number of operands. We should - // probably expose a constrain helper per operand and let the target selector - // constrain individual registers, like fast-isel. - bool constrainSelectedInstRegOperands(MachineInstr &I, - const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; - bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index bf834cf8f5e3..2003a79f6b20 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -40,19 +41,22 @@ enum { GIPFP_I64_Invalid = 0, GIPFP_APInt_Invalid = 0, GIPFP_APFloat_Invalid = 0, + GIPFP_MI_Invalid = 0, }; template <class TgtInstructionSelector, class PredicateBitset, - class ComplexMatcherMemFn> + class ComplexMatcherMemFn, class CustomRendererFn> bool InstructionSelector::executeMatchTable( TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, - const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo, + const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn> + &ISelInfo, const int64_t *MatchTable, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, CodeGenCoverage &CoverageInfo) const { + uint64_t CurrentIdx = 0; - SmallVector<uint64_t, 8> OnFailResumeAt; + SmallVector<uint64_t, 4> OnFailResumeAt; enum RejectAction { RejectAndGiveUp, RejectAndResume }; auto handleReject = [&]() -> RejectAction { @@ -60,8 +64,7 @@ bool InstructionSelector::executeMatchTable( dbgs() << CurrentIdx << ": Rejected\n"); if (OnFailResumeAt.empty()) return RejectAndGiveUp; - CurrentIdx = OnFailResumeAt.back(); - OnFailResumeAt.pop_back(); + CurrentIdx = OnFailResumeAt.pop_back_val(); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Resume at " << CurrentIdx << " (" << OnFailResumeAt.size() << " try-blocks remain)\n"); @@ -70,7 +73,8 @@ bool InstructionSelector::executeMatchTable( while (true) { assert(CurrentIdx != ~0u && "Invalid MatchTable index"); - switch (MatchTable[CurrentIdx++]) { + int64_t MatcherOpcode = MatchTable[CurrentIdx++]; + switch (MatcherOpcode) { case GIM_Try: { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Begin try-block\n"); @@ -124,8 +128,8 @@ bool InstructionSelector::executeMatchTable( dbgs() << CurrentIdx << ": GIM_CheckFeatures(ExpectedBitsetID=" << ExpectedBitsetID << ")\n"); - if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) != - MatcherInfo.FeatureBitsets[ExpectedBitsetID]) { + if ((AvailableFeatures & ISelInfo.FeatureBitsets[ExpectedBitsetID]) != + ISelInfo.FeatureBitsets[ExpectedBitsetID]) { if (handleReject() == RejectAndGiveUp) return false; } @@ -136,12 +140,13 @@ bool InstructionSelector::executeMatchTable( int64_t InsnID = MatchTable[CurrentIdx++]; int64_t Expected = MatchTable[CurrentIdx++]; + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); unsigned Opcode = State.MIs[InsnID]->getOpcode(); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID << "], ExpectedOpcode=" << Expected << ") // Got=" << Opcode << "\n"); - assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); if (Opcode != Expected) { if (handleReject() == RejectAndGiveUp) return false; @@ -149,6 +154,77 @@ bool InstructionSelector::executeMatchTable( break; } + case GIM_SwitchOpcode: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t LowerBound = MatchTable[CurrentIdx++]; + int64_t UpperBound = MatchTable[CurrentIdx++]; + int64_t Default = MatchTable[CurrentIdx++]; + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + const int64_t Opcode = State.MIs[InsnID]->getOpcode(); + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), { + dbgs() << CurrentIdx << ": GIM_SwitchOpcode(MIs[" << InsnID << "], [" + << LowerBound << ", " << UpperBound << "), Default=" << Default + << ", JumpTable...) // Got=" << Opcode << "\n"; + }); + if (Opcode < LowerBound || UpperBound <= Opcode) { + CurrentIdx = Default; + break; + } + CurrentIdx = MatchTable[CurrentIdx + (Opcode - LowerBound)]; + if (!CurrentIdx) { + CurrentIdx = Default; + break; + } + OnFailResumeAt.push_back(Default); + break; + } + + case GIM_SwitchType: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t LowerBound = MatchTable[CurrentIdx++]; + int64_t UpperBound = MatchTable[CurrentIdx++]; + int64_t Default = MatchTable[CurrentIdx++]; + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), { + dbgs() << CurrentIdx << ": GIM_SwitchType(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "), [" << LowerBound << ", " + << UpperBound << "), Default=" << Default + << ", JumpTable...) // Got="; + if (!MO.isReg()) + dbgs() << "Not a VReg\n"; + else + dbgs() << MRI.getType(MO.getReg()) << "\n"; + }); + if (!MO.isReg()) { + CurrentIdx = Default; + break; + } + const LLT Ty = MRI.getType(MO.getReg()); + const auto TyI = ISelInfo.TypeIDMap.find(Ty); + if (TyI == ISelInfo.TypeIDMap.end()) { + CurrentIdx = Default; + break; + } + const int64_t TypeID = TyI->second; + if (TypeID < LowerBound || UpperBound <= TypeID) { + CurrentIdx = Default; + break; + } + CurrentIdx = MatchTable[CurrentIdx + (TypeID - LowerBound)]; + if (!CurrentIdx) { + CurrentIdx = Default; + break; + } + OnFailResumeAt.push_back(Default); + break; + } + case GIM_CheckNumOperands: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t Expected = MatchTable[CurrentIdx++]; @@ -194,7 +270,8 @@ bool InstructionSelector::executeMatchTable( << CurrentIdx << ": GIM_CheckAPIntImmPredicate(MIs[" << InsnID << "], Predicate=" << Predicate << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - assert(State.MIs[InsnID]->getOpcode() && "Expected G_CONSTANT"); + assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_CONSTANT && + "Expected G_CONSTANT"); assert(Predicate > GIPFP_APInt_Invalid && "Expected a valid predicate"); APInt Value; if (State.MIs[InsnID]->getOperand(1).isCImm()) @@ -226,6 +303,21 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckCxxInsnPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Predicate = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() + << CurrentIdx << ": GIM_CheckCxxPredicate(MIs[" + << InsnID << "], Predicate=" << Predicate << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate"); + + if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID])) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckAtomicOrdering: { int64_t InsnID = MatchTable[CurrentIdx++]; AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; @@ -233,7 +325,6 @@ bool InstructionSelector::executeMatchTable( dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs[" << InsnID << "], " << (uint64_t)Ordering << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (!State.MIs[InsnID]->hasOneMemOperand()) if (handleReject() == RejectAndGiveUp) return false; @@ -252,7 +343,6 @@ bool InstructionSelector::executeMatchTable( << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs[" << InsnID << "], " << (uint64_t)Ordering << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (!State.MIs[InsnID]->hasOneMemOperand()) if (handleReject() == RejectAndGiveUp) return false; @@ -271,7 +361,6 @@ bool InstructionSelector::executeMatchTable( << ": GIM_CheckAtomicOrderingWeakerThan(MIs[" << InsnID << "], " << (uint64_t)Ordering << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (!State.MIs[InsnID]->hasOneMemOperand()) if (handleReject() == RejectAndGiveUp) return false; @@ -282,6 +371,87 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckMemorySizeEqualTo: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + uint64_t Size = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckMemorySizeEqual(MIs[" << InsnID + << "]->memoperands() + " << MMOIdx + << ", Size=" << Size << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << MMO->getSize() << " bytes vs " << Size + << " bytes\n"); + if (MMO->getSize() != Size) + if (handleReject() == RejectAndGiveUp) + return false; + + break; + } + case GIM_CheckMemorySizeEqualToLLT: + case GIM_CheckMemorySizeLessThanLLT: + case GIM_CheckMemorySizeGreaterThanLLT: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE( + TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckMemorySize" + << (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT + ? "EqualTo" + : MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT + ? "GreaterThan" + : "LessThan") + << "LLT(MIs[" << InsnID << "]->memoperands() + " << MMOIdx + << ", OpIdx=" << OpIdx << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isReg()) { + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": Not a register\n"); + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + + unsigned Size = MRI.getType(MO.getReg()).getSizeInBits(); + if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT && + MMO->getSize() * 8 != Size) { + if (handleReject() == RejectAndGiveUp) + return false; + } else if (MatcherOpcode == GIM_CheckMemorySizeLessThanLLT && + MMO->getSize() * 8 >= Size) { + if (handleReject() == RejectAndGiveUp) + return false; + } else if (MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT && + MMO->getSize() * 8 <= Size) + if (handleReject() == RejectAndGiveUp) + return false; + + break; + } case GIM_CheckType: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -291,8 +461,9 @@ bool InstructionSelector::executeMatchTable( << "]->getOperand(" << OpIdx << "), TypeID=" << TypeID << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) != - MatcherInfo.TypeObjects[TypeID]) { + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isReg() || + MRI.getType(MO.getReg()) != ISelInfo.TypeObjects[TypeID]) { if (handleReject() == RejectAndGiveUp) return false; } @@ -308,7 +479,6 @@ bool InstructionSelector::executeMatchTable( << InsnID << "]->getOperand(" << OpIdx << "), SizeInBits=" << SizeInBits << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - // iPTR must be looked up in the target. if (SizeInBits == 0) { MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); @@ -317,11 +487,15 @@ bool InstructionSelector::executeMatchTable( assert(SizeInBits != 0 && "Pointer size must be known"); - const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); - if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) { - if (handleReject() == RejectAndGiveUp) - return false; - } + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (MO.isReg()) { + const LLT &Ty = MRI.getType(MO.getReg()); + if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) + if (handleReject() == RejectAndGiveUp) + return false; + } else if (handleReject() == RejectAndGiveUp) + return false; + break; } case GIM_CheckRegBankForClass: { @@ -333,9 +507,10 @@ bool InstructionSelector::executeMatchTable( << InsnID << "]->getOperand(" << OpIdx << "), RCEnum=" << RCEnum << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (&RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) != - RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI, - TRI)) { + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isReg() || + &RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) != + RBI.getRegBank(MO.getReg(), MRI, TRI)) { if (handleReject() == RejectAndGiveUp) return false; } @@ -356,7 +531,7 @@ bool InstructionSelector::executeMatchTable( assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); // FIXME: Use std::invoke() when it's available. ComplexRendererFns Renderer = - (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( + (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])( State.MIs[InsnID]->getOperand(OpIdx)); if (Renderer.hasValue()) State.Renderers[RendererID] = Renderer.getValue(); @@ -375,16 +550,19 @@ bool InstructionSelector::executeMatchTable( << InsnID << "]->getOperand(" << OpIdx << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (MO.isReg()) { + // isOperandImmEqual() will sign-extend to 64-bits, so should we. + LLT Ty = MRI.getType(MO.getReg()); + Value = SignExtend64(Value, Ty.getSizeInBits()); - // isOperandImmEqual() will sign-extend to 64-bits, so should we. - LLT Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); - Value = SignExtend64(Value, Ty.getSizeInBits()); + if (!isOperandImmEqual(MO, Value, MRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + } else if (handleReject() == RejectAndGiveUp) + return false; - if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, - MRI)) { - if (handleReject() == RejectAndGiveUp) - return false; - } break; } @@ -467,7 +645,7 @@ bool InstructionSelector::executeMatchTable( } case GIM_Reject: DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIM_Reject"); + dbgs() << CurrentIdx << ": GIM_Reject\n"); if (handleReject() == RejectAndGiveUp) return false; break; @@ -649,6 +827,36 @@ bool InstructionSelector::executeMatchTable( break; } + // TODO: Needs a test case once we have a pattern that uses this. + case GIR_CopyFConstantAsFPImm: { + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_FCONSTANT && "Expected G_FCONSTANT"); + if (State.MIs[OldInsnID]->getOperand(1).isFPImm()) + OutMIs[NewInsnID].addFPImm( + State.MIs[OldInsnID]->getOperand(1).getFPImm()); + else + llvm_unreachable("Expected FPImm operand"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_CopyFPConstantAsFPImm(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID << "])\n"); + break; + } + + case GIR_CustomRenderer: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + int64_t RendererFnID = MatchTable[CurrentIdx++]; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_CustomRenderer(OutMIs[" + << InsnID << "], MIs[" << OldInsnID << "], " + << RendererFnID << ")\n"); + (ISel.*ISelInfo.CustomRenderers[RendererFnID])(OutMIs[InsnID], + *State.MIs[OldInsnID]); + break; + } case GIR_ConstrainOperandRC: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -710,7 +918,7 @@ bool InstructionSelector::executeMatchTable( int64_t TypeID = MatchTable[CurrentIdx++]; State.TempRegisters[TempRegID] = - MRI.createGenericVirtualRegister(MatcherInfo.TypeObjects[TypeID]); + MRI.createGenericVirtualRegister(ISelInfo.TypeObjects[TypeID]); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": TempRegs[" << TempRegID << "] = GIR_MakeTempReg(" << TypeID << ")\n"); @@ -729,7 +937,7 @@ bool InstructionSelector::executeMatchTable( case GIR_Done: DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIR_Done"); + dbgs() << CurrentIdx << ": GIR_Done\n"); return true; default: diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index e7945ff5bf4f..873587651efd 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -38,7 +38,7 @@ public: return false; if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, MI.getOperand(1).getReg(), MRI)) { - DEBUG(dbgs() << ".. Combine MI: " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = DefMI->getOperand(1).getReg(); Builder.setInstr(MI); @@ -59,10 +59,10 @@ public: MI.getOperand(1).getReg(), MRI)) { unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - if (isInstUnsupported(TargetOpcode::G_AND, DstTy) || - isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy)) + if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || + isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) return false; - DEBUG(dbgs() << ".. Combine MI: " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); Builder.setInstr(MI); unsigned ZExtSrc = MI.getOperand(1).getReg(); LLT ZExtSrcTy = MRI.getType(ZExtSrc); @@ -87,11 +87,11 @@ public: MI.getOperand(1).getReg(), MRI)) { unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - if (isInstUnsupported(TargetOpcode::G_SHL, DstTy) || - isInstUnsupported(TargetOpcode::G_ASHR, DstTy) || - isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy)) + if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) || + isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) || + isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}})) return false; - DEBUG(dbgs() << ".. Combine MI: " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); Builder.setInstr(MI); unsigned SExtSrc = MI.getOperand(1).getReg(); LLT SExtSrcTy = MRI.getType(SExtSrc); @@ -121,9 +121,9 @@ public: MI.getOperand(1).getReg(), MRI)) { unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); - if (isInstUnsupported(TargetOpcode::G_IMPLICIT_DEF, DstTy)) + if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) return false; - DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;); + LLVM_DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;); Builder.setInstr(MI); Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg); markInstAndDefDead(MI, *DefMI, DeadInsts); @@ -139,9 +139,9 @@ public: return false; unsigned NumDefs = MI.getNumOperands() - 1; - unsigned SrcReg = MI.getOperand(NumDefs).getReg(); - MachineInstr *MergeI = MRI.getVRegDef(SrcReg); - if (!MergeI || (MergeI->getOpcode() != TargetOpcode::G_MERGE_VALUES)) + MachineInstr *MergeI = getOpcodeDef(TargetOpcode::G_MERGE_VALUES, + MI.getOperand(NumDefs).getReg(), MRI); + if (!MergeI) return false; const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; @@ -253,11 +253,8 @@ private: // and as a result, %3, %2, %1 are dead. MachineInstr *PrevMI = &MI; while (PrevMI != &DefMI) { - // If we're dealing with G_UNMERGE_VALUES, tryCombineMerges doesn't really try - // to fold copies in between and we can ignore them here. - if (PrevMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) - break; - unsigned PrevRegSrc = PrevMI->getOperand(1).getReg(); + unsigned PrevRegSrc = + PrevMI->getOperand(PrevMI->getNumOperands() - 1).getReg(); MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); if (MRI.hasOneUse(PrevRegSrc)) { if (TmpDef != &DefMI) { @@ -269,18 +266,16 @@ private: break; PrevMI = TmpDef; } - if ((PrevMI == &DefMI || - DefMI.getOpcode() == TargetOpcode::G_MERGE_VALUES) && - MRI.hasOneUse(DefMI.getOperand(0).getReg())) + if (PrevMI == &DefMI && MRI.hasOneUse(DefMI.getOperand(0).getReg())) DeadInsts.push_back(&DefMI); } /// Checks if the target legalizer info has specified anything about the /// instruction, or if unsupported. - bool isInstUnsupported(unsigned Opcode, const LLT &DstTy) const { - auto Action = LI.getAction({Opcode, 0, DstTy}); - return Action.first == LegalizerInfo::LegalizeAction::Unsupported || - Action.first == LegalizerInfo::LegalizeAction::NotFound; + bool isInstUnsupported(const LegalityQuery &Query) const { + using namespace LegalizeActions; + auto Step = LI.getAction(Query); + return Step.Action == Unsupported || Step.Action == NotFound; } }; diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 8bd8a9dcd0e2..d122e67b87b8 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -93,12 +93,24 @@ public: const LegalizerInfo &getLegalizerInfo() const { return LI; } private: + /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a + /// Use by extending the operand's type to \p WideTy using the specified \p + /// ExtOpcode for the extension instruction, and replacing the vreg of the + /// operand in place. + void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, + unsigned ExtOpcode); + + /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a + /// Def by extending the operand's type to \p WideTy and truncating it back + /// with the \p TruncOpcode, and replacing the vreg of the operand in place. + void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0, + unsigned TruncOpcode = TargetOpcode::G_TRUNC); /// Helper function to split a wide generic register into bitwise blocks with /// the given Type (which implies the number of blocks needed). The generic /// registers created are appended to Ops, starting at bit 0 of Reg. void extractParts(unsigned Reg, LLT Ty, int NumParts, - SmallVectorImpl<unsigned> &Ops); + SmallVectorImpl<unsigned> &VRegs); MachineRegisterInfo &MRI; const LegalizerInfo &LI; diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index b6735d538b37..713d72eb4c9b 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -19,8 +19,11 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/LowLevelTypeImpl.h" #include <cassert> #include <cstdint> @@ -30,9 +33,67 @@ namespace llvm { +extern cl::opt<bool> DisableGISelLegalityCheck; + class MachineInstr; class MachineIRBuilder; class MachineRegisterInfo; +class MCInstrInfo; + +namespace LegalizeActions { +enum LegalizeAction : std::uint8_t { + /// The operation is expected to be selectable directly by the target, and + /// no transformation is necessary. + Legal, + + /// The operation should be synthesized from multiple instructions acting on + /// a narrower scalar base-type. For example a 64-bit add might be + /// implemented in terms of 32-bit add-with-carry. + NarrowScalar, + + /// The operation should be implemented in terms of a wider scalar + /// base-type. For example a <2 x s8> add could be implemented as a <2 + /// x s32> add (ignoring the high bits). + WidenScalar, + + /// The (vector) operation should be implemented by splitting it into + /// sub-vectors where the operation is legal. For example a <8 x s64> add + /// might be implemented as 4 separate <2 x s64> adds. + FewerElements, + + /// The (vector) operation should be implemented by widening the input + /// vector and ignoring the lanes added by doing so. For example <2 x i8> is + /// rarely legal, but you might perform an <8 x i8> and then only look at + /// the first two results. + MoreElements, + + /// The operation itself must be expressed in terms of simpler actions on + /// this target. E.g. a SREM replaced by an SDIV and subtraction. + Lower, + + /// The operation should be implemented as a call to some kind of runtime + /// support library. For example this usually happens on machines that don't + /// support floating-point operations natively. + Libcall, + + /// The target wants to do something special with this combination of + /// operand and type. A callback will be issued when it is needed. + Custom, + + /// This operation is completely unsupported on the target. A programming + /// error has occurred. + Unsupported, + + /// Sentinel value for when no action was found in the specified table. + NotFound, + + /// Fall back onto the old rules. + /// TODO: Remove this once we've migrated + UseLegacyRules, +}; +} // end namespace LegalizeActions + +using LegalizeActions::LegalizeAction; /// Legalization is decided based on an instruction's opcode, which type slot /// we're considering, and what the existing type is. These aspects are gathered @@ -51,64 +112,642 @@ struct InstrAspect { } }; -class LegalizerInfo { -public: - enum LegalizeAction : std::uint8_t { - /// The operation is expected to be selectable directly by the target, and - /// no transformation is necessary. - Legal, - - /// The operation should be synthesized from multiple instructions acting on - /// a narrower scalar base-type. For example a 64-bit add might be - /// implemented in terms of 32-bit add-with-carry. - NarrowScalar, - - /// The operation should be implemented in terms of a wider scalar - /// base-type. For example a <2 x s8> add could be implemented as a <2 - /// x s32> add (ignoring the high bits). - WidenScalar, - - /// The (vector) operation should be implemented by splitting it into - /// sub-vectors where the operation is legal. For example a <8 x s64> add - /// might be implemented as 4 separate <2 x s64> adds. - FewerElements, - - /// The (vector) operation should be implemented by widening the input - /// vector and ignoring the lanes added by doing so. For example <2 x i8> is - /// rarely legal, but you might perform an <8 x i8> and then only look at - /// the first two results. - MoreElements, - - /// The operation itself must be expressed in terms of simpler actions on - /// this target. E.g. a SREM replaced by an SDIV and subtraction. - Lower, - - /// The operation should be implemented as a call to some kind of runtime - /// support library. For example this usually happens on machines that don't - /// support floating-point operations natively. - Libcall, - - /// The target wants to do something special with this combination of - /// operand and type. A callback will be issued when it is needed. - Custom, - - /// This operation is completely unsupported on the target. A programming - /// error has occurred. - Unsupported, - - /// Sentinel value for when no action was found in the specified table. - NotFound, +/// The LegalityQuery object bundles together all the information that's needed +/// to decide whether a given operation is legal or not. +/// For efficiency, it doesn't make a copy of Types so care must be taken not +/// to free it before using the query. +struct LegalityQuery { + unsigned Opcode; + ArrayRef<LLT> Types; + + struct MemDesc { + uint64_t Size; + AtomicOrdering Ordering; }; + /// Operations which require memory can use this to place requirements on the + /// memory type for each MMO. + ArrayRef<MemDesc> MMODescrs; + + constexpr LegalityQuery(unsigned Opcode, const ArrayRef<LLT> Types, + const ArrayRef<MemDesc> MMODescrs) + : Opcode(Opcode), Types(Types), MMODescrs(MMODescrs) {} + constexpr LegalityQuery(unsigned Opcode, const ArrayRef<LLT> Types) + : LegalityQuery(Opcode, Types, {}) {} + + raw_ostream &print(raw_ostream &OS) const; +}; + +/// The result of a query. It either indicates a final answer of Legal or +/// Unsupported or describes an action that must be taken to make an operation +/// more legal. +struct LegalizeActionStep { + /// The action to take or the final answer. + LegalizeAction Action; + /// If describing an action, the type index to change. Otherwise zero. + unsigned TypeIdx; + /// If describing an action, the new type for TypeIdx. Otherwise LLT{}. + LLT NewType; + + LegalizeActionStep(LegalizeAction Action, unsigned TypeIdx, + const LLT &NewType) + : Action(Action), TypeIdx(TypeIdx), NewType(NewType) {} + + bool operator==(const LegalizeActionStep &RHS) const { + return std::tie(Action, TypeIdx, NewType) == + std::tie(RHS.Action, RHS.TypeIdx, RHS.NewType); + } +}; + +using LegalityPredicate = std::function<bool (const LegalityQuery &)>; +using LegalizeMutation = + std::function<std::pair<unsigned, LLT>(const LegalityQuery &)>; + +namespace LegalityPredicates { +struct TypePairAndMemSize { + LLT Type0; + LLT Type1; + uint64_t MemSize; + + bool operator==(const TypePairAndMemSize &Other) const { + return Type0 == Other.Type0 && Type1 == Other.Type1 && + MemSize == Other.MemSize; + } +}; + +/// True iff P0 and P1 are true. +template<typename Predicate> +Predicate all(Predicate P0, Predicate P1) { + return [=](const LegalityQuery &Query) { + return P0(Query) && P1(Query); + }; +} +/// True iff all given predicates are true. +template<typename Predicate, typename... Args> +Predicate all(Predicate P0, Predicate P1, Args... args) { + return all(all(P0, P1), args...); +} +/// True iff the given type index is the specified types. +LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit); +/// True iff the given type index is one of the specified types. +LegalityPredicate typeInSet(unsigned TypeIdx, + std::initializer_list<LLT> TypesInit); +/// True iff the given types for the given pair of type indexes is one of the +/// specified type pairs. +LegalityPredicate +typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1, + std::initializer_list<std::pair<LLT, LLT>> TypesInit); +/// True iff the given types for the given pair of type indexes is one of the +/// specified type pairs. +LegalityPredicate typePairAndMemSizeInSet( + unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx, + std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit); +/// True iff the specified type index is a scalar. +LegalityPredicate isScalar(unsigned TypeIdx); +/// True iff the specified type index is a scalar that's narrower than the given +/// size. +LegalityPredicate narrowerThan(unsigned TypeIdx, unsigned Size); +/// True iff the specified type index is a scalar that's wider than the given +/// size. +LegalityPredicate widerThan(unsigned TypeIdx, unsigned Size); +/// True iff the specified type index is a scalar whose size is not a power of +/// 2. +LegalityPredicate sizeNotPow2(unsigned TypeIdx); +/// True iff the specified MMO index has a size that is not a power of 2 +LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx); +/// True iff the specified type index is a vector whose element count is not a +/// power of 2. +LegalityPredicate numElementsNotPow2(unsigned TypeIdx); +/// True iff the specified MMO index has at an atomic ordering of at Ordering or +/// stronger. +LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, + AtomicOrdering Ordering); +} // end namespace LegalityPredicates + +namespace LegalizeMutations { +/// Select this specific type for the given type index. +LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty); +/// Keep the same type as the given type index. +LegalizeMutation changeTo(unsigned TypeIdx, unsigned FromTypeIdx); +/// Widen the type for the given type index to the next power of 2. +LegalizeMutation widenScalarToNextPow2(unsigned TypeIdx, unsigned Min = 0); +/// Add more elements to the type for the given type index to the next power of +/// 2. +LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0); +} // end namespace LegalizeMutations + +/// A single rule in a legalizer info ruleset. +/// The specified action is chosen when the predicate is true. Where appropriate +/// for the action (e.g. for WidenScalar) the new type is selected using the +/// given mutator. +class LegalizeRule { + LegalityPredicate Predicate; + LegalizeAction Action; + LegalizeMutation Mutation; + +public: + LegalizeRule(LegalityPredicate Predicate, LegalizeAction Action, + LegalizeMutation Mutation = nullptr) + : Predicate(Predicate), Action(Action), Mutation(Mutation) {} + + /// Test whether the LegalityQuery matches. + bool match(const LegalityQuery &Query) const { + return Predicate(Query); + } + + LegalizeAction getAction() const { return Action; } + + /// Determine the change to make. + std::pair<unsigned, LLT> determineMutation(const LegalityQuery &Query) const { + if (Mutation) + return Mutation(Query); + return std::make_pair(0, LLT{}); + } +}; + +class LegalizeRuleSet { + /// When non-zero, the opcode we are an alias of + unsigned AliasOf; + /// If true, there is another opcode that aliases this one + bool IsAliasedByAnother; + SmallVector<LegalizeRule, 2> Rules; + +#ifndef NDEBUG + /// If bit I is set, this rule set contains a rule that may handle (predicate + /// or perform an action upon (or both)) the type index I. The uncertainty + /// comes from free-form rules executing user-provided lambda functions. We + /// conservatively assume such rules do the right thing and cover all type + /// indices. The bitset is intentionally 1 bit wider than it absolutely needs + /// to be to distinguish such cases from the cases where all type indices are + /// individually handled. + SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC - + MCOI::OPERAND_FIRST_GENERIC + 2}; +#endif + + unsigned typeIdx(unsigned TypeIdx) { + assert(TypeIdx <= + (MCOI::OPERAND_LAST_GENERIC - MCOI::OPERAND_FIRST_GENERIC) && + "Type Index is out of bounds"); +#ifndef NDEBUG + TypeIdxsCovered.set(TypeIdx); +#endif + return TypeIdx; + } + void markAllTypeIdxsAsCovered() { +#ifndef NDEBUG + TypeIdxsCovered.set(); +#endif + } + + void add(const LegalizeRule &Rule) { + assert(AliasOf == 0 && + "RuleSet is aliased, change the representative opcode instead"); + Rules.push_back(Rule); + } + + static bool always(const LegalityQuery &) { return true; } + + /// Use the given action when the predicate is true. + /// Action should not be an action that requires mutation. + LegalizeRuleSet &actionIf(LegalizeAction Action, + LegalityPredicate Predicate) { + add({Predicate, Action}); + return *this; + } + /// Use the given action when the predicate is true. + /// Action should be an action that requires mutation. + LegalizeRuleSet &actionIf(LegalizeAction Action, LegalityPredicate Predicate, + LegalizeMutation Mutation) { + add({Predicate, Action, Mutation}); + return *this; + } + /// Use the given action when type index 0 is any type in the given list. + /// Action should not be an action that requires mutation. + LegalizeRuleSet &actionFor(LegalizeAction Action, + std::initializer_list<LLT> Types) { + using namespace LegalityPredicates; + return actionIf(Action, typeInSet(typeIdx(0), Types)); + } + /// Use the given action when type index 0 is any type in the given list. + /// Action should be an action that requires mutation. + LegalizeRuleSet &actionFor(LegalizeAction Action, + std::initializer_list<LLT> Types, + LegalizeMutation Mutation) { + using namespace LegalityPredicates; + return actionIf(Action, typeInSet(typeIdx(0), Types), Mutation); + } + /// Use the given action when type indexes 0 and 1 is any type pair in the + /// given list. + /// Action should not be an action that requires mutation. + LegalizeRuleSet &actionFor(LegalizeAction Action, + std::initializer_list<std::pair<LLT, LLT>> Types) { + using namespace LegalityPredicates; + return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types)); + } + /// Use the given action when type indexes 0 and 1 is any type pair in the + /// given list. + /// Action should be an action that requires mutation. + LegalizeRuleSet &actionFor(LegalizeAction Action, + std::initializer_list<std::pair<LLT, LLT>> Types, + LegalizeMutation Mutation) { + using namespace LegalityPredicates; + return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types), + Mutation); + } + /// Use the given action when type indexes 0 and 1 are both in the given list. + /// That is, the type pair is in the cartesian product of the list. + /// Action should not be an action that requires mutation. + LegalizeRuleSet &actionForCartesianProduct(LegalizeAction Action, + std::initializer_list<LLT> Types) { + using namespace LegalityPredicates; + return actionIf(Action, all(typeInSet(typeIdx(0), Types), + typeInSet(typeIdx(1), Types))); + } + /// Use the given action when type indexes 0 and 1 are both in their + /// respective lists. + /// That is, the type pair is in the cartesian product of the lists + /// Action should not be an action that requires mutation. + LegalizeRuleSet & + actionForCartesianProduct(LegalizeAction Action, + std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1) { + using namespace LegalityPredicates; + return actionIf(Action, all(typeInSet(typeIdx(0), Types0), + typeInSet(typeIdx(1), Types1))); + } + /// Use the given action when type indexes 0, 1, and 2 are all in their + /// respective lists. + /// That is, the type triple is in the cartesian product of the lists + /// Action should not be an action that requires mutation. + LegalizeRuleSet &actionForCartesianProduct( + LegalizeAction Action, std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1, std::initializer_list<LLT> Types2) { + using namespace LegalityPredicates; + return actionIf(Action, all(typeInSet(typeIdx(0), Types0), + all(typeInSet(typeIdx(1), Types1), + typeInSet(typeIdx(2), Types2)))); + } + +public: + LegalizeRuleSet() : AliasOf(0), IsAliasedByAnother(false), Rules() {} + + bool isAliasedByAnother() { return IsAliasedByAnother; } + void setIsAliasedByAnother() { IsAliasedByAnother = true; } + void aliasTo(unsigned Opcode) { + assert((AliasOf == 0 || AliasOf == Opcode) && + "Opcode is already aliased to another opcode"); + assert(Rules.empty() && "Aliasing will discard rules"); + AliasOf = Opcode; + } + unsigned getAlias() const { return AliasOf; } + + /// The instruction is legal if predicate is true. + LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { + // We have no choice but conservatively assume that the free-form + // user-provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::Legal, Predicate); + } + /// The instruction is legal when type index 0 is any type in the given list. + LegalizeRuleSet &legalFor(std::initializer_list<LLT> Types) { + return actionFor(LegalizeAction::Legal, Types); + } + /// The instruction is legal when type indexes 0 and 1 is any type pair in the + /// given list. + LegalizeRuleSet &legalFor(std::initializer_list<std::pair<LLT, LLT>> Types) { + return actionFor(LegalizeAction::Legal, Types); + } + /// The instruction is legal when type indexes 0 and 1 along with the memory + /// size is any type and size tuple in the given list. + LegalizeRuleSet &legalForTypesWithMemSize( + std::initializer_list<LegalityPredicates::TypePairAndMemSize> + TypesAndMemSize) { + return actionIf(LegalizeAction::Legal, + LegalityPredicates::typePairAndMemSizeInSet( + typeIdx(0), typeIdx(1), /*MMOIdx*/ 0, TypesAndMemSize)); + } + /// The instruction is legal when type indexes 0 and 1 are both in the given + /// list. That is, the type pair is in the cartesian product of the list. + LegalizeRuleSet &legalForCartesianProduct(std::initializer_list<LLT> Types) { + return actionForCartesianProduct(LegalizeAction::Legal, Types); + } + /// The instruction is legal when type indexes 0 and 1 are both their + /// respective lists. + LegalizeRuleSet &legalForCartesianProduct(std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1) { + return actionForCartesianProduct(LegalizeAction::Legal, Types0, Types1); + } + + /// The instruction is lowered. + LegalizeRuleSet &lower() { + using namespace LegalizeMutations; + // We have no choice but conservatively assume that predicate-less lowering + // properly handles all type indices by design: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::Lower, always); + } + /// The instruction is lowered if predicate is true. Keep type index 0 as the + /// same type. + LegalizeRuleSet &lowerIf(LegalityPredicate Predicate) { + using namespace LegalizeMutations; + // We have no choice but conservatively assume that lowering with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::Lower, Predicate); + } + /// The instruction is lowered if predicate is true. + LegalizeRuleSet &lowerIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that lowering with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::Lower, Predicate, Mutation); + } + /// The instruction is lowered when type index 0 is any type in the given + /// list. Keep type index 0 as the same type. + LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types) { + return actionFor(LegalizeAction::Lower, Types, + LegalizeMutations::changeTo(0, 0)); + } + /// The instruction is lowered when type index 0 is any type in the given + /// list. + LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types, + LegalizeMutation Mutation) { + return actionFor(LegalizeAction::Lower, Types, Mutation); + } + /// The instruction is lowered when type indexes 0 and 1 is any type pair in + /// the given list. Keep type index 0 as the same type. + LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types) { + return actionFor(LegalizeAction::Lower, Types, + LegalizeMutations::changeTo(0, 0)); + } + /// The instruction is lowered when type indexes 0 and 1 is any type pair in + /// the given list. + LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types, + LegalizeMutation Mutation) { + return actionFor(LegalizeAction::Lower, Types, Mutation); + } + /// The instruction is lowered when type indexes 0 and 1 are both in their + /// respective lists. + LegalizeRuleSet &lowerForCartesianProduct(std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1) { + using namespace LegalityPredicates; + return actionForCartesianProduct(LegalizeAction::Lower, Types0, Types1); + } + /// The instruction is lowered when when type indexes 0, 1, and 2 are all in + /// their respective lists. + LegalizeRuleSet &lowerForCartesianProduct(std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1, + std::initializer_list<LLT> Types2) { + using namespace LegalityPredicates; + return actionForCartesianProduct(LegalizeAction::Lower, Types0, Types1, + Types2); + } + + /// Like legalIf, but for the Libcall action. + LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) { + // We have no choice but conservatively assume that a libcall with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::Libcall, Predicate); + } + LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) { + return actionFor(LegalizeAction::Libcall, Types); + } + LegalizeRuleSet & + libcallFor(std::initializer_list<std::pair<LLT, LLT>> Types) { + return actionFor(LegalizeAction::Libcall, Types); + } + LegalizeRuleSet & + libcallForCartesianProduct(std::initializer_list<LLT> Types) { + return actionForCartesianProduct(LegalizeAction::Libcall, Types); + } + LegalizeRuleSet & + libcallForCartesianProduct(std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1) { + return actionForCartesianProduct(LegalizeAction::Libcall, Types0, Types1); + } + + /// Widen the scalar to the one selected by the mutation if the predicate is + /// true. + LegalizeRuleSet &widenScalarIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that an action with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation); + } + /// Narrow the scalar to the one selected by the mutation if the predicate is + /// true. + LegalizeRuleSet &narrowScalarIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that an action with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation); + } + + /// Add more elements to reach the type selected by the mutation if the + /// predicate is true. + LegalizeRuleSet &moreElementsIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that an action with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::MoreElements, Predicate, Mutation); + } + /// Remove elements to reach the type selected by the mutation if the + /// predicate is true. + LegalizeRuleSet &fewerElementsIf(LegalityPredicate Predicate, + LegalizeMutation Mutation) { + // We have no choice but conservatively assume that an action with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::FewerElements, Predicate, Mutation); + } + + /// The instruction is unsupported. + LegalizeRuleSet &unsupported() { + return actionIf(LegalizeAction::Unsupported, always); + } + LegalizeRuleSet &unsupportedIf(LegalityPredicate Predicate) { + return actionIf(LegalizeAction::Unsupported, Predicate); + } + LegalizeRuleSet &unsupportedIfMemSizeNotPow2() { + return actionIf(LegalizeAction::Unsupported, + LegalityPredicates::memSizeInBytesNotPow2(0)); + } + + LegalizeRuleSet &customIf(LegalityPredicate Predicate) { + // We have no choice but conservatively assume that a custom action with a + // free-form user provided Predicate properly handles all type indices: + markAllTypeIdxsAsCovered(); + return actionIf(LegalizeAction::Custom, Predicate); + } + LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) { + return actionFor(LegalizeAction::Custom, Types); + } + LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) { + return actionForCartesianProduct(LegalizeAction::Custom, Types); + } + LegalizeRuleSet & + customForCartesianProduct(std::initializer_list<LLT> Types0, + std::initializer_list<LLT> Types1) { + return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1); + } + + /// Widen the scalar to the next power of two that is at least MinSize. + /// No effect if the type is not a scalar or is a power of two. + LegalizeRuleSet &widenScalarToNextPow2(unsigned TypeIdx, + unsigned MinSize = 0) { + using namespace LegalityPredicates; + return actionIf(LegalizeAction::WidenScalar, sizeNotPow2(typeIdx(TypeIdx)), + LegalizeMutations::widenScalarToNextPow2(TypeIdx, MinSize)); + } + + LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) { + using namespace LegalityPredicates; + return actionIf(LegalizeAction::NarrowScalar, isScalar(typeIdx(TypeIdx)), + Mutation); + } + + /// Ensure the scalar is at least as wide as Ty. + LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT &Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf(LegalizeAction::WidenScalar, + narrowerThan(TypeIdx, Ty.getSizeInBits()), + changeTo(typeIdx(TypeIdx), Ty)); + } + + /// Ensure the scalar is at most as wide as Ty. + LegalizeRuleSet &maxScalar(unsigned TypeIdx, const LLT &Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf(LegalizeAction::NarrowScalar, + widerThan(TypeIdx, Ty.getSizeInBits()), + changeTo(typeIdx(TypeIdx), Ty)); + } + + /// Conditionally limit the maximum size of the scalar. + /// For example, when the maximum size of one type depends on the size of + /// another such as extracting N bits from an M bit container. + LegalizeRuleSet &maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, + const LLT &Ty) { + using namespace LegalityPredicates; + using namespace LegalizeMutations; + return actionIf(LegalizeAction::NarrowScalar, + [=](const LegalityQuery &Query) { + return widerThan(TypeIdx, Ty.getSizeInBits()) && + Predicate(Query); + }, + changeTo(typeIdx(TypeIdx), Ty)); + } + + /// Limit the range of scalar sizes to MinTy and MaxTy. + LegalizeRuleSet &clampScalar(unsigned TypeIdx, const LLT &MinTy, + const LLT &MaxTy) { + assert(MinTy.isScalar() && MaxTy.isScalar() && "Expected scalar types"); + return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy); + } + + /// Add more elements to the vector to reach the next power of two. + /// No effect if the type is not a vector or the element count is a power of + /// two. + LegalizeRuleSet &moreElementsToNextPow2(unsigned TypeIdx) { + using namespace LegalityPredicates; + return actionIf(LegalizeAction::MoreElements, + numElementsNotPow2(typeIdx(TypeIdx)), + LegalizeMutations::moreElementsToNextPow2(TypeIdx)); + } + + /// Limit the number of elements in EltTy vectors to at least MinElements. + LegalizeRuleSet &clampMinNumElements(unsigned TypeIdx, const LLT &EltTy, + unsigned MinElements) { + // Mark the type index as covered: + typeIdx(TypeIdx); + return actionIf( + LegalizeAction::MoreElements, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return VecTy.isVector() && VecTy.getElementType() == EltTy && + VecTy.getNumElements() < MinElements; + }, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return std::make_pair( + TypeIdx, LLT::vector(MinElements, VecTy.getScalarSizeInBits())); + }); + } + /// Limit the number of elements in EltTy vectors to at most MaxElements. + LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT &EltTy, + unsigned MaxElements) { + // Mark the type index as covered: + typeIdx(TypeIdx); + return actionIf( + LegalizeAction::FewerElements, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return VecTy.isVector() && VecTy.getElementType() == EltTy && + VecTy.getNumElements() > MaxElements; + }, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return std::make_pair( + TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits())); + }); + } + /// Limit the number of elements for the given vectors to at least MinTy's + /// number of elements and at most MaxTy's number of elements. + /// + /// No effect if the type is not a vector or does not have the same element + /// type as the constraints. + /// The element type of MinTy and MaxTy must match. + LegalizeRuleSet &clampNumElements(unsigned TypeIdx, const LLT &MinTy, + const LLT &MaxTy) { + assert(MinTy.getElementType() == MaxTy.getElementType() && + "Expected element types to agree"); + + const LLT &EltTy = MinTy.getElementType(); + return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements()) + .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements()); + } + + /// Fallback on the previous implementation. This should only be used while + /// porting a rule. + LegalizeRuleSet &fallback() { + add({always, LegalizeAction::UseLegacyRules}); + return *this; + } + + /// Check if there is no type index which is obviously not handled by the + /// LegalizeRuleSet in any way at all. + /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. + bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const; + + /// Apply the ruleset to the given LegalityQuery. + LegalizeActionStep apply(const LegalityQuery &Query) const; +}; + +class LegalizerInfo { +public: LegalizerInfo(); virtual ~LegalizerInfo() = default; + unsigned getOpcodeIdxForOpcode(unsigned Opcode) const; + unsigned getActionDefinitionsIdx(unsigned Opcode) const; + /// Compute any ancillary tables needed to quickly decide how an operation /// should be handled. This must be called after all "set*Action"methods but /// before any query is made or incorrect results may be returned. void computeTables(); + /// Perform simple self-diagnostic and assert if there is anything obviously + /// wrong with the actions set up. + void verify(const MCInstrInfo &MII) const; + static bool needsLegalizingToDifferentSize(const LegalizeAction Action) { + using namespace LegalizeActions; switch (Action) { case NarrowScalar: case WidenScalar: @@ -121,8 +760,8 @@ public: } } - typedef std::pair<uint16_t, LegalizeAction> SizeAndAction; - typedef std::vector<SizeAndAction> SizeAndActionsVec; + using SizeAndAction = std::pair<uint16_t, LegalizeAction>; + using SizeAndActionsVec = std::vector<SizeAndAction>; using SizeChangeStrategy = std::function<SizeAndActionsVec(const SizeAndActionsVec &v)>; @@ -186,8 +825,9 @@ public: /// and Unsupported for all other scalar types T. static SizeAndActionsVec unsupportedForDifferentSizes(const SizeAndActionsVec &v) { + using namespace LegalizeActions; return increaseToLargerTypesAndDecreaseToLargest(v, Unsupported, - Unsupported); + Unsupported); } /// A SizeChangeStrategy for the common case where legalization for a @@ -196,32 +836,36 @@ public: /// largest legal type. static SizeAndActionsVec widenToLargerTypesAndNarrowToLargest(const SizeAndActionsVec &v) { + using namespace LegalizeActions; assert(v.size() > 0 && "At least one size that can be legalized towards is needed" " for this SizeChangeStrategy"); return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar, - NarrowScalar); + NarrowScalar); } static SizeAndActionsVec widenToLargerTypesUnsupportedOtherwise(const SizeAndActionsVec &v) { + using namespace LegalizeActions; return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar, - Unsupported); + Unsupported); } static SizeAndActionsVec narrowToSmallerAndUnsupportedIfTooSmall(const SizeAndActionsVec &v) { + using namespace LegalizeActions; return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar, - Unsupported); + Unsupported); } static SizeAndActionsVec narrowToSmallerAndWidenToSmallest(const SizeAndActionsVec &v) { + using namespace LegalizeActions; assert(v.size() > 0 && "At least one size that can be legalized towards is needed" " for this SizeChangeStrategy"); return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar, - WidenScalar); + WidenScalar); } /// A SizeChangeStrategy for the common case where legalization for a @@ -244,8 +888,9 @@ public: /// (FewerElements, vector(4,32)). static SizeAndActionsVec moreToWiderTypesAndLessToWidest(const SizeAndActionsVec &v) { + using namespace LegalizeActions; return increaseToLargerTypesAndDecreaseToLargest(v, MoreElements, - FewerElements); + FewerElements); } /// Helper function to implement many typical SizeChangeStrategy functions. @@ -259,22 +904,46 @@ public: LegalizeAction DecreaseAction, LegalizeAction IncreaseAction); - /// Determine what action should be taken to legalize the given generic - /// instruction opcode, type-index and type. Requires computeTables to have - /// been called. + /// Get the action definitions for the given opcode. Use this to run a + /// LegalityQuery through the definitions. + const LegalizeRuleSet &getActionDefinitions(unsigned Opcode) const; + + /// Get the action definition builder for the given opcode. Use this to define + /// the action definitions. /// - /// \returns a pair consisting of the kind of legalization that should be - /// performed and the destination type. - std::pair<LegalizeAction, LLT> getAction(const InstrAspect &Aspect) const; + /// It is an error to request an opcode that has already been requested by the + /// multiple-opcode variant. + LegalizeRuleSet &getActionDefinitionsBuilder(unsigned Opcode); + + /// Get the action definition builder for the given set of opcodes. Use this + /// to define the action definitions for multiple opcodes at once. The first + /// opcode given will be considered the representative opcode and will hold + /// the definitions whereas the other opcodes will be configured to refer to + /// the representative opcode. This lowers memory requirements and very + /// slightly improves performance. + /// + /// It would be very easy to introduce unexpected side-effects as a result of + /// this aliasing if it were permitted to request different but intersecting + /// sets of opcodes but that is difficult to keep track of. It is therefore an + /// error to request the same opcode twice using this API, to request an + /// opcode that already has definitions, or to use the single-opcode API on an + /// opcode that has already been requested by this API. + LegalizeRuleSet & + getActionDefinitionsBuilder(std::initializer_list<unsigned> Opcodes); + void aliasActionDefinitions(unsigned OpcodeTo, unsigned OpcodeFrom); + + /// Determine what action should be taken to legalize the described + /// instruction. Requires computeTables to have been called. + /// + /// \returns a description of the next legalization step to perform. + LegalizeActionStep getAction(const LegalityQuery &Query) const; /// Determine what action should be taken to legalize the given generic /// instruction. /// - /// \returns a tuple consisting of the LegalizeAction that should be - /// performed, the type-index it should be performed on and the destination - /// type. - std::tuple<LegalizeAction, unsigned, LLT> - getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; + /// \returns a description of the next legalization step to perform. + LegalizeActionStep getAction(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const; bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; @@ -283,6 +952,15 @@ public: MachineIRBuilder &MIRBuilder) const; private: + /// Determine what action should be taken to legalize the given generic + /// instruction opcode, type-index and type. Requires computeTables to have + /// been called. + /// + /// \returns a pair consisting of the kind of legalization that should be + /// performed and the destination type. + std::pair<LegalizeAction, LLT> + getAspectAction(const InstrAspect &Aspect) const; + /// The SizeAndActionsVec is a representation mapping between all natural /// numbers and an Action. The natural number represents the bit size of /// the InstrAspect. For example, for a target with native support for 32-bit @@ -350,6 +1028,7 @@ private: /// A partial SizeAndActionsVec potentially doesn't cover all bit sizes, /// i.e. it's OK if it doesn't start from size 1. static void checkPartialSizeAndActionsVector(const SizeAndActionsVec& v) { + using namespace LegalizeActions; #ifndef NDEBUG // The sizes should be in increasing order int prev_size = -1; @@ -441,7 +1120,7 @@ private: static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; // Data structures used temporarily during construction of legality data: - typedef DenseMap<LLT, LegalizeAction> TypeMap; + using TypeMap = DenseMap<LLT, LegalizeAction>; SmallVector<TypeMap, 1> SpecifiedActions[LastOp - FirstOp + 1]; SmallVector<SizeChangeStrategy, 1> ScalarSizeChangeStrategies[LastOp - FirstOp + 1]; @@ -456,8 +1135,16 @@ private: AddrSpace2PointerActions[LastOp - FirstOp + 1]; std::unordered_map<uint16_t, SmallVector<SizeAndActionsVec, 1>> NumElements2Actions[LastOp - FirstOp + 1]; + + LegalizeRuleSet RulesForOpcode[LastOp - FirstOp + 1]; }; +#ifndef NDEBUG +/// Checks that MIR is fully legal, returns an illegal instruction if it's not, +/// nullptr otherwise +const MachineInstr *machineFunctionIsIllegal(const MachineFunction &MF); +#endif + } // end namespace llvm. #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H diff --git a/include/llvm/CodeGen/GlobalISel/Localizer.h b/include/llvm/CodeGen/GlobalISel/Localizer.h index 0a46eb9e7840..1e2d4763e5e1 100644 --- a/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -70,6 +70,8 @@ public: .set(MachineFunctionProperties::Property::RegBankSelected); } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; }; diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h new file mode 100644 index 000000000000..f77f9a8df7ee --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -0,0 +1,338 @@ +//== ----- llvm/CodeGen/GlobalISel/MIPatternMatch.h --------------------- == // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// Contains matchers for matching SSA Machine Instructions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_GMIR_PATTERNMATCH_H +#define LLVM_GMIR_PATTERNMATCH_H + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { +namespace MIPatternMatch { + +template <typename Reg, typename Pattern> +bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) { + return P.match(MRI, R); +} + +// TODO: Extend for N use. +template <typename SubPatternT> struct OneUse_match { + SubPatternT SubPat; + OneUse_match(const SubPatternT &SP) : SubPat(SP) {} + + template <typename OpTy> + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { + return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg); + } +}; + +template <typename SubPat> +inline OneUse_match<SubPat> m_OneUse(const SubPat &SP) { + return SP; +} + +struct ConstantMatch { + int64_t &CR; + ConstantMatch(int64_t &C) : CR(C) {} + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { + if (auto MaybeCst = getConstantVRegVal(Reg, MRI)) { + CR = *MaybeCst; + return true; + } + return false; + } +}; + +inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } + +// TODO: Rework this for different kinds of MachineOperand. +// Currently assumes the Src for a match is a register. +// We might want to support taking in some MachineOperands and call getReg on +// that. + +struct operand_type_match { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return true; } + bool match(const MachineRegisterInfo &MRI, MachineOperand *MO) { + return MO->isReg(); + } +}; + +inline operand_type_match m_Reg() { return operand_type_match(); } + +/// Matching combinators. +template <typename... Preds> struct And { + template <typename MatchSrc> + bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + return true; + } +}; + +template <typename Pred, typename... Preds> +struct And<Pred, Preds...> : And<Preds...> { + Pred P; + And(Pred &&p, Preds &&... preds) + : And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) { + } + template <typename MatchSrc> + bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + return P.match(MRI, src) && And<Preds...>::match(MRI, src); + } +}; + +template <typename... Preds> struct Or { + template <typename MatchSrc> + bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + return false; + } +}; + +template <typename Pred, typename... Preds> +struct Or<Pred, Preds...> : Or<Preds...> { + Pred P; + Or(Pred &&p, Preds &&... preds) + : Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {} + template <typename MatchSrc> + bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + return P.match(MRI, src) || Or<Preds...>::match(MRI, src); + } +}; + +template <typename... Preds> And<Preds...> m_all_of(Preds &&... preds) { + return And<Preds...>(std::forward<Preds>(preds)...); +} + +template <typename... Preds> Or<Preds...> m_any_of(Preds &&... preds) { + return Or<Preds...>(std::forward<Preds>(preds)...); +} + +template <typename BindTy> struct bind_helper { + static bool bind(const MachineRegisterInfo &MRI, BindTy &VR, BindTy &V) { + VR = V; + return true; + } +}; + +template <> struct bind_helper<MachineInstr *> { + static bool bind(const MachineRegisterInfo &MRI, MachineInstr *&MI, + unsigned Reg) { + MI = MRI.getVRegDef(Reg); + if (MI) + return true; + return false; + } +}; + +template <> struct bind_helper<LLT> { + static bool bind(const MachineRegisterInfo &MRI, LLT &Ty, unsigned Reg) { + Ty = MRI.getType(Reg); + if (Ty.isValid()) + return true; + return false; + } +}; + +template <> struct bind_helper<const ConstantFP *> { + static bool bind(const MachineRegisterInfo &MRI, const ConstantFP *&F, + unsigned Reg) { + F = getConstantFPVRegVal(Reg, MRI); + if (F) + return true; + return false; + } +}; + +template <typename Class> struct bind_ty { + Class &VR; + + bind_ty(Class &V) : VR(V) {} + + template <typename ITy> bool match(const MachineRegisterInfo &MRI, ITy &&V) { + return bind_helper<Class>::bind(MRI, VR, V); + } +}; + +inline bind_ty<unsigned> m_Reg(unsigned &R) { return R; } +inline bind_ty<MachineInstr *> m_MInstr(MachineInstr *&MI) { return MI; } +inline bind_ty<LLT> m_Type(LLT &Ty) { return Ty; } + +// Helper for matching G_FCONSTANT +inline bind_ty<const ConstantFP *> m_GFCst(const ConstantFP *&C) { return C; } + +// General helper for all the binary generic MI such as G_ADD/G_SUB etc +template <typename LHS_P, typename RHS_P, unsigned Opcode, + bool Commutable = false> +struct BinaryOp_match { + LHS_P L; + RHS_P R; + + BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {} + template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + MachineInstr *TmpMI; + if (mi_match(Op, MRI, m_MInstr(TmpMI))) { + if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) { + return (L.match(MRI, TmpMI->getOperand(1).getReg()) && + R.match(MRI, TmpMI->getOperand(2).getReg())) || + (Commutable && (R.match(MRI, TmpMI->getOperand(1).getReg()) && + L.match(MRI, TmpMI->getOperand(2).getReg()))); + } + } + return false; + } +}; + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_ADD, true> +m_GAdd(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_ADD, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB> m_GSub(const LHS &L, + const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_MUL, true> +m_GMul(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_MUL, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_FADD, true> +m_GFAdd(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_FADD, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_FMUL, true> +m_GFMul(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_FMUL, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_FSUB, false> +m_GFSub(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_FSUB, false>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_AND, true> +m_GAnd(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_AND, true>(L, R); +} + +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true> m_GOr(const LHS &L, + const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true>(L, R); +} + +// Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc +template <typename SrcTy, unsigned Opcode> struct UnaryOp_match { + SrcTy L; + + UnaryOp_match(const SrcTy &LHS) : L(LHS) {} + template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + MachineInstr *TmpMI; + if (mi_match(Op, MRI, m_MInstr(TmpMI))) { + if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) { + return L.match(MRI, TmpMI->getOperand(1).getReg()); + } + } + return false; + } +}; + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_ANYEXT> +m_GAnyExt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_ANYEXT>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_SEXT> m_GSExt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_SEXT>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_ZEXT> m_GZExt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_ZEXT>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_FPEXT> m_GFPExt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_FPEXT>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_TRUNC> m_GTrunc(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_TRUNC>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_BITCAST> +m_GBitcast(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_BITCAST>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_PTRTOINT> +m_GPtrToInt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_PTRTOINT>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_INTTOPTR> +m_GIntToPtr(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_INTTOPTR>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_FPTRUNC> +m_GFPTrunc(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_FPTRUNC>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_FABS> m_GFabs(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_FABS>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_FNEG> m_GFNeg(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_FNEG>(Src); +} + +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::COPY> m_Copy(SrcTy &&Src) { + return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src)); +} + +// Helper for checking if a Reg is of specific type. +struct CheckType { + LLT Ty; + CheckType(const LLT &Ty) : Ty(Ty) {} + + bool match(MachineRegisterInfo &MRI, unsigned Reg) { + return MRI.getType(Reg) == Ty; + } +}; + +inline CheckType m_SpecificType(LLT Ty) { return Ty; } + +} // namespace GMIPatternMatch +} // namespace llvm + +#endif diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index aa875c11d86f..983a4e680d5c 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -23,7 +23,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" -#include <queue> namespace llvm { @@ -32,11 +31,10 @@ class MachineFunction; class MachineInstr; class TargetInstrInfo; -/// Helper class to build MachineInstr. -/// It keeps internally the insertion point and debug location for all -/// the new instructions we want to create. -/// This information can be modify via the related setters. -class MachineIRBuilder { +/// Class which stores all the state required in a MachineIRBuilder. +/// Since MachineIRBuilders will only store state in this object, it allows +/// to transfer BuilderState between different kinds of MachineIRBuilders. +struct MachineIRBuilderState { /// MachineFunction under construction. MachineFunction *MF; /// Information used to access the description of the opcodes. @@ -53,15 +51,23 @@ class MachineIRBuilder { /// @} std::function<void(MachineInstr *)> InsertedInstr; +}; + +/// Helper class to build MachineInstr. +/// It keeps internally the insertion point and debug location for all +/// the new instructions we want to create. +/// This information can be modify via the related setters. +class MachineIRBuilderBase { + MachineIRBuilderState State; const TargetInstrInfo &getTII() { - assert(TII && "TargetInstrInfo is not set"); - return *TII; + assert(State.TII && "TargetInstrInfo is not set"); + return *State.TII; } void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend); - MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1); +protected: unsigned getDestFromArg(unsigned Reg) { return Reg; } unsigned getDestFromArg(LLT Ty) { return getMF().getRegInfo().createGenericVirtualRegister(Ty); @@ -89,30 +95,41 @@ class MachineIRBuilder { return MIB->getOperand(0).getReg(); } + void validateBinaryOp(unsigned Res, unsigned Op0, unsigned Op1); + public: /// Some constructors for easy use. - MachineIRBuilder() = default; - MachineIRBuilder(MachineFunction &MF) { setMF(MF); } - MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) { + MachineIRBuilderBase() = default; + MachineIRBuilderBase(MachineFunction &MF) { setMF(MF); } + MachineIRBuilderBase(MachineInstr &MI) : MachineIRBuilderBase(*MI.getMF()) { setInstr(MI); } + MachineIRBuilderBase(const MachineIRBuilderState &BState) : State(BState) {} + /// Getter for the function we currently build. MachineFunction &getMF() { - assert(MF && "MachineFunction is not set"); - return *MF; + assert(State.MF && "MachineFunction is not set"); + return *State.MF; } + /// Getter for DebugLoc + const DebugLoc &getDL() { return State.DL; } + + /// Getter for MRI + MachineRegisterInfo *getMRI() { return State.MRI; } + + /// Getter for the State + MachineIRBuilderState &getState() { return State; } + /// Getter for the basic block we currently build. MachineBasicBlock &getMBB() { - assert(MBB && "MachineBasicBlock is not set"); - return *MBB; + assert(State.MBB && "MachineBasicBlock is not set"); + return *State.MBB; } /// Current insertion point for new instructions. - MachineBasicBlock::iterator getInsertPt() { - return II; - } + MachineBasicBlock::iterator getInsertPt() { return State.II; } /// Set the insertion point before the specified position. /// \pre MBB must be in getMF(). @@ -137,15 +154,16 @@ public: /// \name Control where instructions we create are recorded (typically for /// visiting again later during legalization). /// @{ + void recordInsertion(MachineInstr *InsertedInstr) const; void recordInsertions(std::function<void(MachineInstr *)> InsertedInstr); void stopRecordingInsertions(); /// @} /// Set the debug location to \p DL for all the next build instructions. - void setDebugLoc(const DebugLoc &DL) { this->DL = DL; } + void setDebugLoc(const DebugLoc &DL) { this->State.DL = DL; } /// Get the current instruction's debug location. - DebugLoc getDebugLoc() { return DL; } + DebugLoc getDebugLoc() { return State.DL; } /// Build and insert <empty> = \p Opcode <empty>. /// The insertion point is the one set by the last call of either @@ -156,20 +174,6 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildInstr(unsigned Opcode); - /// DAG like Generic method for building arbitrary instructions as above. - /// \Opc opcode for the instruction. - /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst - /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder) - /// Uses of type MachineInstrBuilder will perform - /// getOperand(0).getReg() to convert to register. - template <typename DstTy, typename... UseArgsTy> - MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, - UseArgsTy &&... Args) { - auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty)); - addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...); - return MIB; - } - /// Build but don't insert <empty> = \p Opcode <empty>. /// /// \pre setMF, setBasicBlock or setMI must have been called. @@ -227,49 +231,6 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV); - /// Build and insert \p Res = G_ADD \p Op0, \p Op1 - /// - /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1, - /// truncated to their width. - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers - /// with the same (scalar or vector) type). - /// - /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildAdd(unsigned Res, unsigned Op0, - unsigned Op1); - template <typename DstTy, typename... UseArgsTy> - MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) { - unsigned Res = getDestFromArg(Ty); - return buildAdd(Res, (getRegFromArg(UseArgs))...); - } - - /// Build and insert \p Res = G_SUB \p Op0, \p Op1 - /// - /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1, - /// truncated to their width. - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers - /// with the same (scalar or vector) type). - /// - /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildSub(unsigned Res, unsigned Op0, - unsigned Op1); - - /// Build and insert \p Res = G_MUL \p Op0, \p Op1 - /// - /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1, - /// truncated to their width. - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers - /// with the same (scalar or vector) type). - /// - /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildMul(unsigned Res, unsigned Op0, - unsigned Op1); /// Build and insert \p Res = G_GEP \p Op0, \p Op1 /// @@ -338,34 +299,6 @@ public: MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0, unsigned Op1, unsigned CarryIn); - /// Build and insert \p Res = G_AND \p Op0, \p Op1 - /// - /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p - /// Op1. - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers - /// with the same (scalar or vector) type). - /// - /// \return a MachineInstrBuilder for the newly created instruction. - template <typename DstTy, typename... UseArgsTy> - MachineInstrBuilder buildAnd(DstTy &&Dst, UseArgsTy &&... UseArgs) { - return buildAnd(getDestFromArg(Dst), getRegFromArg(UseArgs)...); - } - MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0, - unsigned Op1); - - /// Build and insert \p Res = G_OR \p Op0, \p Op1 - /// - /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p - /// Op1. - /// - /// \pre setBasicBlock or setMI must have been called. - /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers - /// with the same (scalar or vector) type). - /// - /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1); /// Build and insert \p Res = G_ANYEXT \p Op0 /// @@ -399,6 +332,10 @@ public: /// \pre \p Op must be smaller than \p Res /// /// \return The newly created instruction. + template <typename DstType, typename ArgType> + MachineInstrBuilder buildSExt(DstType &&Res, ArgType &&Arg) { + return buildSExt(getDestFromArg(Res), getRegFromArg(Arg)); + } MachineInstrBuilder buildSExt(unsigned Res, unsigned Op); /// Build and insert \p Res = G_ZEXT \p Op @@ -413,6 +350,10 @@ public: /// \pre \p Op must be smaller than \p Res /// /// \return The newly created instruction. + template <typename DstType, typename ArgType> + MachineInstrBuilder buildZExt(DstType &&Res, ArgType &&Arg) { + return buildZExt(getDestFromArg(Res), getRegFromArg(Arg)); + } MachineInstrBuilder buildZExt(unsigned Res, unsigned Op); /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or @@ -423,6 +364,10 @@ public: /// \pre \p Op must be a generic virtual register with scalar or vector type. /// /// \return The newly created instruction. + template <typename DstTy, typename UseArgTy> + MachineInstrBuilder buildSExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) { + return buildSExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use)); + } MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op); /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or @@ -433,6 +378,10 @@ public: /// \pre \p Op must be a generic virtual register with scalar or vector type. /// /// \return The newly created instruction. + template <typename DstTy, typename UseArgTy> + MachineInstrBuilder buildZExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) { + return buildZExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use)); + } MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op); // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or @@ -462,6 +411,10 @@ public: unsigned Op); /// Build and insert an appropriate cast between two registers of equal size. + template <typename DstType, typename ArgType> + MachineInstrBuilder buildCast(DstType &&Res, ArgType &&Arg) { + return buildCast(getDestFromArg(Res), getRegFromArg(Arg)); + } MachineInstrBuilder buildCast(unsigned Dst, unsigned Src); /// Build and insert G_BR \p Dest @@ -471,7 +424,7 @@ public: /// \pre setBasicBlock or setMI must have been called. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildBr(MachineBasicBlock &BB); + MachineInstrBuilder buildBr(MachineBasicBlock &Dest); /// Build and insert G_BRCOND \p Tst, \p Dest /// @@ -485,7 +438,7 @@ public: /// depend on bit 0 (for now). /// /// \return The newly created instruction. - MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &BB); + MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &Dest); /// Build and insert G_BRINDIRECT \p Tgt /// @@ -532,8 +485,18 @@ public: /// \pre \p Res must be a generic virtual register with scalar type. /// /// \return The newly created instruction. + template <typename DstType> + MachineInstrBuilder buildFConstant(DstType &&Res, const ConstantFP &Val) { + return buildFConstant(getDestFromArg(Res), Val); + } MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val); + template <typename DstType> + MachineInstrBuilder buildFConstant(DstType &&Res, double Val) { + return buildFConstant(getDestFromArg(Res), Val); + } + MachineInstrBuilder buildFConstant(unsigned Res, double Val); + /// Build and insert \p Res = COPY Op /// /// Register-to-register COPY sets \p Res to \p Op. @@ -559,6 +522,18 @@ public: MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO); + /// Build and insert `Res = <opcode> Addr, MMO`. + /// + /// Loads the value stored at \p Addr. Puts the result in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildLoadInstr(unsigned Opcode, unsigned Res, + unsigned Addr, MachineMemOperand &MMO); + /// Build and insert `G_STORE Val, Addr, MMO`. /// /// Stores the value \p Val to \p Addr. @@ -580,7 +555,10 @@ public: MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index); /// Build and insert \p Res = IMPLICIT_DEF. - MachineInstrBuilder buildUndef(unsigned Dst); + template <typename DstType> MachineInstrBuilder buildUndef(DstType &&Res) { + return buildUndef(getDestFromArg(Res)); + } + MachineInstrBuilder buildUndef(unsigned Res); /// Build and insert instructions to put \p Ops together at the specified p /// Indices to form a larger register. @@ -649,6 +627,10 @@ public: /// \pre \p Res must be smaller than \p Op /// /// \return The newly created instruction. + template <typename DstType, typename SrcType> + MachineInstrBuilder buildFPTrunc(DstType &&Res, SrcType &&Src) { + return buildFPTrunc(getDestFromArg(Res), getRegFromArg(Src)); + } MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op); /// Build and insert \p Res = G_TRUNC \p Op @@ -735,7 +717,28 @@ public: MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val, unsigned Idx); - /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, + /// Build and insert `OldValRes<def>, SuccessRes<def> = + /// G_ATOMIC_CMPXCHG_WITH_SUCCESS Addr, CmpVal, NewVal, MMO`. + /// + /// Atomically replace the value at \p Addr with \p NewVal if it is currently + /// \p CmpVal otherwise leaves it unchanged. Puts the original value from \p + /// Addr in \p Res, along with an s1 indicating whether it was replaced. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register of scalar type. + /// \pre \p SuccessRes must be a generic virtual register of scalar type. It + /// will be assigned 0 on failure and 1 on success. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, \p CmpVal, and \p NewVal must be generic virtual + /// registers of the same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder + buildAtomicCmpXchgWithSuccess(unsigned OldValRes, unsigned SuccessRes, + unsigned Addr, unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, /// MMO`. /// /// Atomically replace the value at \p Addr with \p NewVal if it is currently @@ -752,6 +755,328 @@ public: MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, unsigned CmpVal, unsigned NewVal, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_<Opcode> Addr, Val, MMO`. + /// + /// Atomically read-modify-update the value at \p Addr with \p Val. Puts the + /// original value from \p Addr in \p OldValRes. The modification is + /// determined by the opcode. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMW(unsigned Opcode, unsigned OldValRes, + unsigned Addr, unsigned Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with \p Val. Puts the original + /// value from \p Addr in \p OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_ADD Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the addition of \p Val and + /// the original value. Puts the original value from \p Addr in \p OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_SUB Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the subtraction of \p Val and + /// the original value. Puts the original value from \p Addr in \p OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_AND Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the bitwise and of \p Val and + /// the original value. Puts the original value from \p Addr in \p OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_NAND Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the bitwise nand of \p Val + /// and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_OR Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the bitwise or of \p Val and + /// the original value. Puts the original value from \p Addr in \p OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWOr(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_XOR Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the bitwise xor of \p Val and + /// the original value. Puts the original value from \p Addr in \p OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_MAX Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the signed maximum of \p + /// Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_MIN Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the signed minimum of \p + /// Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_UMAX Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the unsigned maximum of \p + /// Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_UMIN Addr, Val, MMO`. + /// + /// Atomically replace the value at \p Addr with the unsigned minimum of \p + /// Val and the original value. Puts the original value from \p Addr in \p + /// OldValRes. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, and \p Val must be generic virtual registers of the + /// same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO); +}; + +/// A CRTP class that contains methods for building instructions that can +/// be constant folded. MachineIRBuilders that want to inherit from this will +/// need to implement buildBinaryOp (for constant folding binary ops). +/// Alternatively, they can implement buildInstr(Opc, Dst, Uses...) to perform +/// additional folding for Opc. +template <typename Base> +class FoldableInstructionsBuilder : public MachineIRBuilderBase { + Base &base() { return static_cast<Base &>(*this); } + +public: + using MachineIRBuilderBase::MachineIRBuilderBase; + /// Build and insert \p Res = G_ADD \p Op0, \p Op1 + /// + /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1, + /// truncated to their width. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + + MachineInstrBuilder buildAdd(unsigned Dst, unsigned Src0, unsigned Src1) { + return base().buildBinaryOp(TargetOpcode::G_ADD, Dst, Src0, Src1); + } + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = base().getDestFromArg(Ty); + return base().buildAdd(Res, (base().getRegFromArg(UseArgs))...); + } + + /// Build and insert \p Res = G_SUB \p Op0, \p Op1 + /// + /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1, + /// truncated to their width. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + + MachineInstrBuilder buildSub(unsigned Dst, unsigned Src0, unsigned Src1) { + return base().buildBinaryOp(TargetOpcode::G_SUB, Dst, Src0, Src1); + } + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildSub(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = base().getDestFromArg(Ty); + return base().buildSub(Res, (base().getRegFromArg(UseArgs))...); + } + + /// Build and insert \p Res = G_MUL \p Op0, \p Op1 + /// + /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1, + /// truncated to their width. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildMul(unsigned Dst, unsigned Src0, unsigned Src1) { + return base().buildBinaryOp(TargetOpcode::G_MUL, Dst, Src0, Src1); + } + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildMul(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = base().getDestFromArg(Ty); + return base().buildMul(Res, (base().getRegFromArg(UseArgs))...); + } + + /// Build and insert \p Res = G_AND \p Op0, \p Op1 + /// + /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p + /// Op1. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + + MachineInstrBuilder buildAnd(unsigned Dst, unsigned Src0, unsigned Src1) { + return base().buildBinaryOp(TargetOpcode::G_AND, Dst, Src0, Src1); + } + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildAnd(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = base().getDestFromArg(Ty); + return base().buildAnd(Res, (base().getRegFromArg(UseArgs))...); + } + + /// Build and insert \p Res = G_OR \p Op0, \p Op1 + /// + /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p + /// Op1. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildOr(unsigned Dst, unsigned Src0, unsigned Src1) { + return base().buildBinaryOp(TargetOpcode::G_OR, Dst, Src0, Src1); + } + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildOr(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = base().getDestFromArg(Ty); + return base().buildOr(Res, (base().getRegFromArg(UseArgs))...); + } +}; + +class MachineIRBuilder : public FoldableInstructionsBuilder<MachineIRBuilder> { +public: + using FoldableInstructionsBuilder< + MachineIRBuilder>::FoldableInstructionsBuilder; + MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Dst, + unsigned Src0, unsigned Src1) { + validateBinaryOp(Dst, Src0, Src1); + return buildInstr(Opcode).addDef(Dst).addUse(Src0).addUse(Src1); + } + using FoldableInstructionsBuilder<MachineIRBuilder>::buildInstr; + /// DAG like Generic method for building arbitrary instructions as above. + /// \Opc opcode for the instruction. + /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst + /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder) + /// Uses of type MachineInstrBuilder will perform + /// getOperand(0).getReg() to convert to register. + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, + UseArgsTy &&... Args) { + auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty)); + addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...); + return MIB; + } }; } // End namespace llvm. diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index 676955c33fe9..c53ae416e60b 100644 --- a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -22,7 +22,7 @@ /// of an instruction should live. It asks the target which banks may be /// used for each operand of the instruction and what is the cost. Then, /// it chooses the solution which minimize the cost of the instruction plus -/// the cost of any move that may be needed to to the values into the right +/// the cost of any move that may be needed to the values into the right /// register bank. /// In other words, the cost for an instruction on a register bank RegBank /// is: Cost of I on RegBank plus the sum of the cost for bringing the diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/include/llvm/CodeGen/GlobalISel/RegisterBank.h index 5d758423f4e7..d5612e17393c 100644 --- a/include/llvm/CodeGen/GlobalISel/RegisterBank.h +++ b/include/llvm/CodeGen/GlobalISel/RegisterBank.h @@ -42,7 +42,7 @@ private: public: RegisterBank(unsigned ID, const char *Name, unsigned Size, - const uint32_t *ContainedRegClasses, unsigned NumRegClasses); + const uint32_t *CoveredClasses, unsigned NumRegClasses); /// Get the identifier of this register bank. unsigned getID() const { return ID; } diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index 02868b220984..82fd7eddb68a 100644 --- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -622,6 +622,8 @@ public: /// \pre \p Reg is a virtual register that either has a bank or a class. /// \returns The constrained register class, or nullptr if there is none. /// \note This is a generic variant of MachineRegisterInfo::constrainRegClass + /// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel + /// purpose, including non-select passes of GlobalISel static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI); diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 5864c15cc8eb..51e3a2732972 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -19,8 +19,10 @@ namespace llvm { +class AnalysisUsage; class MachineFunction; class MachineInstr; +class MachineOperand; class MachineOptimizationRemarkEmitter; class MachineOptimizationRemarkMissed; class MachineRegisterInfo; @@ -32,6 +34,7 @@ class TargetRegisterInfo; class TargetRegisterClass; class Twine; class ConstantFP; +class APFloat; /// Try to constrain Reg to the specified register class. If this fails, /// create a new virtual register in the correct class and insert a COPY before @@ -57,8 +60,21 @@ unsigned constrainOperandRegClass(const MachineFunction &MF, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, - unsigned Reg, unsigned OpIdx); + const MachineOperand &RegMO, unsigned OpIdx); +/// Mutate the newly-selected instruction \p I to constrain its (possibly +/// generic) virtual register operands to the instruction's register class. +/// This could involve inserting COPYs before (for uses) or after (for defs). +/// This requires the number of operands to match the instruction description. +/// \returns whether operand regclass constraining succeeded. +/// +// FIXME: Not all instructions have the same number of operands. We should +// probably expose a constrain helper per operand and let the target selector +// constrain individual registers, like fast-isel. +bool constrainSelectedInstRegOperands(MachineInstr &I, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI); /// Check whether an instruction \p MI is dead: it only defines dead virtual /// registers, and doesn't have other side effects. bool isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI); @@ -85,5 +101,12 @@ const ConstantFP* getConstantFPVRegVal(unsigned VReg, MachineInstr *getOpcodeDef(unsigned Opcode, unsigned Reg, const MachineRegisterInfo &MRI); +/// Returns an APFloat from Val converted to the appropriate size. +APFloat getAPFloatFromSize(double Val, unsigned Size); + +/// Modify analysis usage so it preserves passes required for the SelectionDAG +/// fallback. +void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index d256849be9af..80bd796d5374 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -377,6 +377,8 @@ namespace ISD { /// When the 1st operand is a vector, the shift amount must be in the same /// type. (TLI.getShiftAmountTy() will return the same type when the input /// type is a vector.) + /// For rotates, the shift amount is treated as an unsigned amount modulo + /// the element size of the first operand. SHL, SRA, SRL, ROTL, ROTR, /// Byte Swap and Counting operators. @@ -412,19 +414,11 @@ namespace ISD { /// then the result type must also be a vector type. SETCC, - /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, and - /// op #2 is a *carry value*. This operator checks the result of - /// "LHS - RHS - Carry", and can be used to compare two wide integers: - /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers. - /// FIXME: This node is deprecated in favor of SETCCCARRY. - /// It is kept around for now to provide a smooth transition path - /// toward the use of SETCCCARRY and will eventually be removed. - SETCCE, - /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but /// op #2 is a boolean indicating if there is an incoming carry. This /// operator checks the result of "LHS - RHS - Carry", and can be used to - /// compare two wide integers: (setcce lhshi rhshi (subc lhslo rhslo) cc). + /// compare two wide integers: + /// (setcccarry lhshi rhshi (subcarry lhslo rhslo) cc). /// Only valid for integers. SETCCCARRY, @@ -495,7 +489,8 @@ namespace ISD { ZERO_EXTEND_VECTOR_INREG, /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned - /// integer. + /// integer. These have the same semantics as fptosi and fptoui in IR. If + /// the FP value cannot fit in the integer type, the results are undefined. FP_TO_SINT, FP_TO_UINT, @@ -779,6 +774,7 @@ namespace ISD { ATOMIC_LOAD_ADD, ATOMIC_LOAD_SUB, ATOMIC_LOAD_AND, + ATOMIC_LOAD_CLR, ATOMIC_LOAD_OR, ATOMIC_LOAD_XOR, ATOMIC_LOAD_NAND, diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index 988e6d6cb3a3..9b8d83ce77ca 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_LATENCYPRIORITYQUEUE_H #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Config/llvm-config.h" namespace llvm { class LatencyPriorityQueue; @@ -26,7 +27,7 @@ namespace llvm { LatencyPriorityQueue *PQ; explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {} - bool operator()(const SUnit* left, const SUnit* right) const; + bool operator()(const SUnit* LHS, const SUnit* RHS) const; }; class LatencyPriorityQueue : public SchedulingPriorityQueue { @@ -83,11 +84,15 @@ namespace llvm { void remove(SUnit *SU) override; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override; +#endif + // scheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. - void scheduledNode(SUnit *Node) override; + void scheduledNode(SUnit *SU) override; private: void AdjustPriorityOfUnscheduledPreds(SUnit *SU); diff --git a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h index 848ee1dc0dc6..221f16a03f16 100644 --- a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h +++ b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" namespace llvm { -/// \brief This is an alternative analysis pass to MachineBlockFrequencyInfo. +/// This is an alternative analysis pass to MachineBlockFrequencyInfo. /// The difference is that with this pass, the block frequencies are not /// computed when the analysis pass is executed but rather when the BFI result /// is explicitly requested by the analysis client. @@ -49,7 +49,7 @@ private: /// The function. MachineFunction *MF = nullptr; - /// \brief Calculate MBFI and all other analyses that's not available and + /// Calculate MBFI and all other analyses that's not available and /// required by BFI. MachineBlockFrequencyInfo &calculateIfNotAvailable() const; @@ -58,10 +58,10 @@ public: LazyMachineBlockFrequencyInfoPass(); - /// \brief Compute and return the block frequencies. + /// Compute and return the block frequencies. MachineBlockFrequencyInfo &getBFI() { return calculateIfNotAvailable(); } - /// \brief Compute and return the block frequencies. + /// Compute and return the block frequencies. const MachineBlockFrequencyInfo &getBFI() const { return calculateIfNotAvailable(); } diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index f4fa872c7f5b..cdf9ad2588cf 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -326,7 +326,7 @@ namespace llvm { /// createDeadDef - Make sure the range has a value defined at Def. /// If one already exists, return it. Otherwise allocate a new value and /// add liveness for a dead def. - VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator); + VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc); /// Create a def of value @p VNI. Return @p VNI. If there already exists /// a definition at VNI->def, the value defined there must be @p VNI. @@ -454,7 +454,7 @@ namespace llvm { /// overlapsFrom - Return true if the intersection of the two live ranges /// is not empty. The specified iterator is a hint that we can begin /// scanning the Other range starting at I. - bool overlapsFrom(const LiveRange &Other, const_iterator I) const; + bool overlapsFrom(const LiveRange &Other, const_iterator StartPos) const; /// Returns true if all segments of the @p Other live range are completely /// covered by this live range. @@ -482,7 +482,7 @@ namespace llvm { /// @p Use, return {nullptr, false}. If there is an "undef" before @p Use, /// return {nullptr, true}. std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs, - SlotIndex StartIdx, SlotIndex Use); + SlotIndex StartIdx, SlotIndex Kill); /// Simplified version of the above "extendInBlock", which assumes that /// no register lanes are undefined by <def,read-undef> operands. @@ -609,7 +609,7 @@ namespace llvm { void print(raw_ostream &OS) const; void dump() const; - /// \brief Walk the range and assert if any invariants fail to hold. + /// Walk the range and assert if any invariants fail to hold. /// /// Note that this is a no-op when asserts are disabled. #ifdef NDEBUG @@ -791,7 +791,7 @@ namespace llvm { /// L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod /// function will be applied to the L0010 and L0008 subranges. void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, - std::function<void(LiveInterval::SubRange&)> Mod); + std::function<void(LiveInterval::SubRange&)> Apply); bool operator<(const LiveInterval& other) const { const SlotIndex &thisIndex = beginIndex(); @@ -802,7 +802,7 @@ namespace llvm { void print(raw_ostream &OS) const; void dump() const; - /// \brief Walks the interval and assert if any invariants fail to hold. + /// Walks the interval and assert if any invariants fail to hold. /// /// Note that this is a no-op when asserts are disabled. #ifdef NDEBUG diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h index b922e543c856..9e2799bd4414 100644 --- a/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/include/llvm/CodeGen/LiveIntervalUnion.h @@ -154,7 +154,7 @@ public: unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()); // Was this virtual register visited during collectInterferingVRegs? - bool isSeenInterference(LiveInterval *VReg) const; + bool isSeenInterference(LiveInterval *VirtReg) const; // Did collectInterferingVRegs collect all interferences? bool seenAllInterferences() const { return SeenAllInterferences; } diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h index 1150f3c1c47b..291a07a712cb 100644 --- a/include/llvm/CodeGen/LiveIntervals.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -105,7 +105,7 @@ class VirtRegMap; /// Calculate the spill weight to assign to a single instruction. static float getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, - const MachineInstr &Instr); + const MachineInstr &MI); /// Calculate the spill weight to assign to a single instruction. static float getSpillWeight(bool isDef, bool isUse, @@ -462,6 +462,10 @@ class VirtRegMap; void computeRegUnitRange(LiveRange&, unsigned Unit); void computeVirtRegInterval(LiveInterval&); + using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>; + void extendSegmentsToUses(LiveRange &Segments, + ShrinkToUsesWorkList &WorkList, unsigned Reg, + LaneBitmask LaneMask); /// Helper function for repairIntervalsInRange(), walks backwards and /// creates/modifies live segments in \p LR to match the operands found. diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h index f9aab0d09e1f..301a45066b4c 100644 --- a/include/llvm/CodeGen/LivePhysRegs.h +++ b/include/llvm/CodeGen/LivePhysRegs.h @@ -44,7 +44,7 @@ class MachineOperand; class MachineRegisterInfo; class raw_ostream; -/// \brief A set of physical registers with utility functions to track liveness +/// A set of physical registers with utility functions to track liveness /// when walking backward/forward through a basic block. class LivePhysRegs { const TargetRegisterInfo *TRI = nullptr; @@ -84,7 +84,7 @@ public: LiveRegs.insert(*SubRegs); } - /// \brief Removes a physical register, all its sub-registers, and all its + /// Removes a physical register, all its sub-registers, and all its /// super-registers from the set. void removeReg(unsigned Reg) { assert(TRI && "LivePhysRegs is not initialized."); @@ -98,7 +98,7 @@ public: SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers = nullptr); - /// \brief Returns true if register \p Reg is contained in the set. This also + /// Returns true if register \p Reg is contained in the set. This also /// works if only the super register of \p Reg has been defined, because /// addReg() always adds all sub-registers to the set as well. /// Note: Returns false if just some sub registers are live, use available() @@ -155,7 +155,7 @@ public: void dump() const; private: - /// \brief Adds live-in registers from basic block \p MBB, taking associated + /// Adds live-in registers from basic block \p MBB, taking associated /// lane masks into consideration. void addBlockLiveIns(const MachineBasicBlock &MBB); @@ -169,7 +169,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { return OS; } -/// \brief Computes registers live-in to \p MBB assuming all of its successors +/// Computes registers live-in to \p MBB assuming all of its successors /// live-in lists are up-to-date. Puts the result into the given LivePhysReg /// instance \p LiveRegs. void computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB); @@ -185,6 +185,13 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB); +/// Convenience function for recomputing live-in's for \p MBB. +static inline void recomputeLiveIns(MachineBasicBlock &MBB) { + LivePhysRegs LPR; + MBB.clearLiveIns(); + computeAndAddLiveIns(LPR, MBB); +} + } // end namespace llvm #endif // LLVM_CODEGEN_LIVEPHYSREGS_H diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index 84bccde0caa2..53830297c525 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -117,10 +117,13 @@ private: /// registers are created. void MRI_NoteNewVirtualRegister(unsigned VReg) override; - /// \brief Check if MachineOperand \p MO is a last use/kill either in the + /// Check if MachineOperand \p MO is a last use/kill either in the /// main live range of \p LI or in one of the matching subregister ranges. bool useIsKill(const LiveInterval &LI, const MachineOperand &MO) const; + /// Create a new empty interval based on OldReg. + LiveInterval &createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges); + public: /// Create a LiveRangeEdit for breaking down parent into smaller pieces. /// @param parent The register being spilled or split. @@ -174,16 +177,13 @@ public: return makeArrayRef(NewRegs).slice(FirstNew); } - /// createEmptyIntervalFrom - Create a new empty interval based on OldReg. - LiveInterval &createEmptyIntervalFrom(unsigned OldReg); - /// createFrom - Create a new virtual register based on OldReg. unsigned createFrom(unsigned OldReg); /// create - Create a new register with the same class and original slot as /// parent. LiveInterval &createEmptyInterval() { - return createEmptyIntervalFrom(getReg()); + return createEmptyIntervalFrom(getReg(), true); } unsigned create() { return createFrom(getReg()); } @@ -233,12 +233,6 @@ public: return Rematted.count(ParentVNI); } - void markDeadRemat(MachineInstr *inst) { - // DeadRemats is an optional field. - if (DeadRemats) - DeadRemats->insert(inst); - } - /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try /// to erase it from LIS. void eraseVirtReg(unsigned Reg); diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h index fa6827f6b1f9..f62a55c73085 100644 --- a/include/llvm/CodeGen/LiveRegMatrix.h +++ b/include/llvm/CodeGen/LiveRegMatrix.h @@ -107,6 +107,13 @@ public: /// with the highest enum value is returned. InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg); + /// Check for interference in the segment [Start, End) that may prevent + /// assignment to PhysReg. If this function returns true, there is + /// interference in the segment [Start, End) of some other interval already + /// assigned to PhysReg. If this function returns false, PhysReg is free at + /// the segment [Start, End). + bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg); + /// Assign VirtReg to PhysReg. /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and /// update VirtRegMap. The live range is expected to be available in PhysReg. diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index dc4956da9637..249545906e01 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -16,6 +16,7 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" @@ -40,6 +41,36 @@ public: init(TRI); } + /// For a machine instruction \p MI, adds all register units used in + /// \p UsedRegUnits and defined or clobbered in \p ModifiedRegUnits. This is + /// useful when walking over a range of instructions to track registers + /// used or defined seperately. + static void accumulateUsedDefed(const MachineInstr &MI, + LiveRegUnits &ModifiedRegUnits, + LiveRegUnits &UsedRegUnits, + const TargetRegisterInfo *TRI) { + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (O->isRegMask()) + ModifiedRegUnits.addRegsInMask(O->getRegMask()); + if (!O->isReg()) + continue; + unsigned Reg = O->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (O->isDef()) { + // Some architectures (e.g. AArch64 XZR/WZR) have registers that are + // constant and may be used as destinations to indicate the generated + // value is discarded. No need to track such case as a def. + if (!TRI->isConstantPhysReg(Reg)) + ModifiedRegUnits.addReg(Reg); + } else { + assert(O->isUse() && "Reg operand not a def and not a use"); + UsedRegUnits.addReg(Reg); + } + } + return; + } + /// Initialize and clear the set. void init(const TargetRegisterInfo &TRI) { this->TRI = &TRI; @@ -59,7 +90,7 @@ public: Units.set(*Unit); } - /// \brief Adds register units covered by physical register \p Reg that are + /// Adds register units covered by physical register \p Reg that are /// part of the lanemask \p Mask. void addRegMasked(unsigned Reg, LaneBitmask Mask) { for (MCRegUnitMaskIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) { diff --git a/include/llvm/CodeGen/LoopTraversal.h b/include/llvm/CodeGen/LoopTraversal.h new file mode 100644 index 000000000000..750da0143c0d --- /dev/null +++ b/include/llvm/CodeGen/LoopTraversal.h @@ -0,0 +1,116 @@ +//==------ llvm/CodeGen/LoopTraversal.h - Loop Traversal -*- C++ -*---------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Loop Traversal logic. +/// +/// This class provides the basic blocks traversal order used by passes like +/// ReachingDefAnalysis and ExecutionDomainFix. +/// It identifies basic blocks that are part of loops and should to be visited +/// twice and returns efficient traversal order for all the blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LOOPTRAVERSAL_H +#define LLVM_CODEGEN_LOOPTRAVERSAL_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineFunction; + +/// This class provides the basic blocks traversal order used by passes like +/// ReachingDefAnalysis and ExecutionDomainFix. +/// It identifies basic blocks that are part of loops and should to be visited +/// twice and returns efficient traversal order for all the blocks. +/// +/// We want to visit every instruction in every basic block in order to update +/// it's execution domain or collect clearance information. However, for the +/// clearance calculation, we need to know clearances from all predecessors +/// (including any backedges), therfore we need to visit some blocks twice. +/// As an example, consider the following loop. +/// +/// +/// PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT +/// ^ | +/// +----------------------------------+ +/// +/// The iteration order this pass will return is as follows: +/// Optimized: PH A B C A' B' C' D +/// +/// The basic block order is constructed as follows: +/// Once we finish processing some block, we update the counters in MBBInfos +/// and re-process any successors that are now 'done'. +/// We call a block that is ready for its final round of processing `done` +/// (isBlockDone), e.g. when all predecessor information is known. +/// +/// Note that a naive traversal order would be to do two complete passes over +/// all basic blocks/instructions, the first for recording clearances, the +/// second for updating clearance based on backedges. +/// However, for functions without backedges, or functions with a lot of +/// straight-line code, and a small loop, that would be a lot of unnecessary +/// work (since only the BBs that are part of the loop require two passes). +/// +/// E.g., the naive iteration order for the above exmple is as follows: +/// Naive: PH A B C D A' B' C' D' +/// +/// In the optimized approach we avoid processing D twice, because we +/// can entirely process the predecessors before getting to D. +class LoopTraversal { +private: + struct MBBInfo { + /// Whether we have gotten to this block in primary processing yet. + bool PrimaryCompleted = false; + + /// The number of predecessors for which primary processing has completed + unsigned IncomingProcessed = 0; + + /// The value of `IncomingProcessed` at the start of primary processing + unsigned PrimaryIncoming = 0; + + /// The number of predecessors for which all processing steps are done. + unsigned IncomingCompleted = 0; + + MBBInfo() = default; + }; + using MBBInfoMap = SmallVector<MBBInfo, 4>; + /// Helps keep track if we proccessed this block and all its predecessors. + MBBInfoMap MBBInfos; + +public: + struct TraversedMBBInfo { + /// The basic block. + MachineBasicBlock *MBB = nullptr; + + /// True if this is the first time we process the basic block. + bool PrimaryPass = true; + + /// True if the block that is ready for its final round of processing. + bool IsDone = true; + + TraversedMBBInfo(MachineBasicBlock *BB = nullptr, bool Primary = true, + bool Done = true) + : MBB(BB), PrimaryPass(Primary), IsDone(Done) {} + }; + LoopTraversal() {} + + /// Identifies basic blocks that are part of loops and should to be + /// visited twice and returns efficient traversal order for all the blocks. + typedef SmallVector<TraversedMBBInfo, 4> TraversalOrder; + TraversalOrder traverse(MachineFunction &MF); + +private: + /// Returens true if the block is ready for its final round of processing. + bool isBlockDone(MachineBasicBlock *MBB); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_LOOPTRAVERSAL_H diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h index b631a8c0122a..e199a1f69ad7 100644 --- a/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -45,7 +45,7 @@ public: /// \returns nullptr if a parsing error occurred. std::unique_ptr<Module> parseIRModule(); - /// \brief Parses MachineFunctions in the MIR file and add them to the given + /// Parses MachineFunctions in the MIR file and add them to the given /// MachineModuleInfo \p MMI. /// /// \returns true if an error occurred. diff --git a/include/llvm/CodeGen/MIRPrinter.h b/include/llvm/CodeGen/MIRPrinter.h index c73adc3f2b11..078c4b2f6072 100644 --- a/include/llvm/CodeGen/MIRPrinter.h +++ b/include/llvm/CodeGen/MIRPrinter.h @@ -38,7 +38,7 @@ void printMIR(raw_ostream &OS, const MachineFunction &MF); /// this funciton and the parser will use this function to construct a list if /// it is missing. void guessSuccessors(const MachineBasicBlock &MBB, - SmallVectorImpl<MachineBasicBlock*> &Successors, + SmallVectorImpl<MachineBasicBlock*> &Result, bool &IsFallthrough); } // end namespace llvm diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index ba40e522e261..7f46406c4789 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -258,11 +258,11 @@ template <> struct MappingTraits<MachineStackObject> { YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, true); YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional<int64_t>()); - YamlIO.mapOptional("di-variable", Object.DebugVar, + YamlIO.mapOptional("debug-info-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("di-expression", Object.DebugExpr, + YamlIO.mapOptional("debug-info-expression", Object.DebugExpr, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("di-location", Object.DebugLoc, + YamlIO.mapOptional("debug-info-location", Object.DebugLoc, StringValue()); // Don't print it out when it's empty. } @@ -283,6 +283,9 @@ struct FixedMachineStackObject { bool IsAliased = false; StringValue CalleeSavedRegister; bool CalleeSavedRestored = true; + StringValue DebugVar; + StringValue DebugExpr; + StringValue DebugLoc; bool operator==(const FixedMachineStackObject &Other) const { return ID == Other.ID && Type == Other.Type && Offset == Other.Offset && @@ -290,7 +293,9 @@ struct FixedMachineStackObject { StackID == Other.StackID && IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased && CalleeSavedRegister == Other.CalleeSavedRegister && - CalleeSavedRestored == Other.CalleeSavedRestored; + CalleeSavedRestored == Other.CalleeSavedRestored && + DebugVar == Other.DebugVar && DebugExpr == Other.DebugExpr + && DebugLoc == Other.DebugLoc; } }; @@ -321,6 +326,12 @@ template <> struct MappingTraits<FixedMachineStackObject> { StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, true); + YamlIO.mapOptional("debug-info-variable", Object.DebugVar, + StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("debug-info-expression", Object.DebugExpr, + StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("debug-info-location", Object.DebugLoc, + StringValue()); // Don't print it out when it's empty. } static const bool flow = true; @@ -417,6 +428,7 @@ struct MachineFrameInfo { bool HasOpaqueSPAdjustment = false; bool HasVAStart = false; bool HasMustTailInVarArgFunc = false; + unsigned LocalFrameSize = 0; StringValue SavePoint; StringValue RestorePoint; @@ -434,6 +446,7 @@ struct MachineFrameInfo { HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment && HasVAStart == Other.HasVAStart && HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc && + LocalFrameSize == Other.LocalFrameSize && SavePoint == Other.SavePoint && RestorePoint == Other.RestorePoint; } }; @@ -457,6 +470,7 @@ template <> struct MappingTraits<MachineFrameInfo> { YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false); YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc, false); + YamlIO.mapOptional("localFrameSize", MFI.LocalFrameSize, (unsigned)0); YamlIO.mapOptional("savePoint", MFI.SavePoint, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("restorePoint", MFI.RestorePoint, @@ -472,6 +486,7 @@ struct MachineFunction { bool Legalized = false; bool RegBankSelected = false; bool Selected = false; + bool FailedISel = false; // Register information bool TracksRegLiveness = false; std::vector<VirtualRegisterDefinition> VirtualRegisters; @@ -495,6 +510,7 @@ template <> struct MappingTraits<MachineFunction> { YamlIO.mapOptional("legalized", MF.Legalized, false); YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); YamlIO.mapOptional("selected", MF.Selected, false); + YamlIO.mapOptional("failedISel", MF.FailedISel, false); YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); YamlIO.mapOptional("registers", MF.VirtualRegisters, std::vector<VirtualRegisterDefinition>()); diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 0c9110cbaa87..ace33efd8713 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -58,7 +58,7 @@ private: public: void addNodeToList(MachineInstr *N); void removeNodeFromList(MachineInstr *N); - void transferNodesFromList(ilist_traits &OldList, instr_iterator First, + void transferNodesFromList(ilist_traits &FromList, instr_iterator First, instr_iterator Last); void deleteNode(MachineInstr *MI); }; @@ -115,13 +115,18 @@ private: /// branch. bool AddressTaken = false; + /// Indicate that this basic block is the entry block of an EH scope, i.e., + /// the block that used to have a catchpad or cleanuppad instruction in the + /// LLVM IR. + bool IsEHScopeEntry = false; + /// Indicate that this basic block is the entry block of an EH funclet. bool IsEHFuncletEntry = false; /// Indicate that this basic block is the entry block of a cleanup funclet. bool IsCleanupFuncletEntry = false; - /// \brief since getSymbol is a relatively heavy-weight operation, the symbol + /// since getSymbol is a relatively heavy-weight operation, the symbol /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; @@ -225,6 +230,14 @@ public: return make_range(getFirstTerminator(), end()); } + /// Returns a range that iterates over the phis in the basic block. + inline iterator_range<iterator> phis() { + return make_range(begin(), getFirstNonPHI()); + } + inline iterator_range<const_iterator> phis() const { + return const_cast<MachineBasicBlock *>(this)->phis(); + } + // Machine-CFG iterators using pred_iterator = std::vector<MachineBasicBlock *>::iterator; using const_pred_iterator = std::vector<MachineBasicBlock *>::const_iterator; @@ -367,6 +380,14 @@ public: bool hasEHPadSuccessor() const; + /// Returns true if this is the entry block of an EH scope, i.e., the block + /// that used to have a catchpad or cleanuppad instruction in the LLVM IR. + bool isEHScopeEntry() const { return IsEHScopeEntry; } + + /// Indicates if this is the entry block of an EH scope, i.e., the block that + /// that used to have a catchpad or cleanuppad instruction in the LLVM IR. + void setIsEHScopeEntry(bool V = true) { IsEHScopeEntry = V; } + /// Returns true if this is the entry block of an EH funclet. bool isEHFuncletEntry() const { return IsEHFuncletEntry; } @@ -449,6 +470,18 @@ public: /// Replace successor OLD with NEW and update probability info. void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Copy a successor (and any probability info) from original block to this + /// block's. Uses an iterator into the original blocks successors. + /// + /// This is useful when doing a partial clone of successors. Afterward, the + /// probabilities may need to be normalized. + void copySuccessor(MachineBasicBlock *Orig, succ_iterator I); + + /// Split the old successor into old plus new and updates the probability + /// info. + void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, + bool NormalizeSuccProbs = false); + /// Transfers all the successors from MBB to this machine basic block (i.e., /// copies all the successors FromMBB and remove all the successors from /// FromMBB). @@ -546,7 +579,7 @@ public: /// Check if the edge between this block and the given successor \p /// Succ, can be split. If this returns true a subsequent call to /// SplitCriticalEdge is guaranteed to return a valid basic block if - /// no changes occured in the meantime. + /// no changes occurred in the meantime. bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const; void pop_front() { Insts.pop_front(); } @@ -685,12 +718,19 @@ public: bool IsCond); /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE - /// instructions. Return UnknownLoc if there is none. + /// and DBG_LABEL instructions. Return UnknownLoc if there is none. DebugLoc findDebugLoc(instr_iterator MBBI); DebugLoc findDebugLoc(iterator MBBI) { return findDebugLoc(MBBI.getInstrIterator()); } + /// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE + /// instructions. Return UnknownLoc if there is none. + DebugLoc findPrevDebugLoc(instr_iterator MBBI); + DebugLoc findPrevDebugLoc(iterator MBBI) { + return findPrevDebugLoc(MBBI.getInstrIterator()); + } + /// Find and return the merged DebugLoc of the branch instructions of the /// block. Return UnknownLoc if there is none. DebugLoc findBranchDebugLoc(); @@ -717,9 +757,10 @@ public: // Debugging methods. void dump() const; - void print(raw_ostream &OS, const SlotIndexes* = nullptr) const; + void print(raw_ostream &OS, const SlotIndexes * = nullptr, + bool IsStandalone = true) const; void print(raw_ostream &OS, ModuleSlotTracker &MST, - const SlotIndexes* = nullptr) const; + const SlotIndexes * = nullptr, bool IsStandalone = true) const; // Printing method used by LoopInfo. void printAsOperand(raw_ostream &OS, bool PrintType = true) const; @@ -874,7 +915,7 @@ public: /// const_instr_iterator} and the respective reverse iterators. template<typename IterT> inline IterT skipDebugInstructionsForward(IterT It, IterT End) { - while (It != End && It->isDebugValue()) + while (It != End && It->isDebugInstr()) It++; return It; } @@ -885,7 +926,7 @@ inline IterT skipDebugInstructionsForward(IterT It, IterT End) { /// const_instr_iterator} and the respective reverse iterators. template<class IterT> inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) { - while (It != Begin && It->isDebugValue()) + while (It != Begin && It->isDebugInstr()) It--; return It; } diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index 1705a0f7e59b..b0b5420a884b 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -63,7 +63,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, /// This class is a data container for one entry in a MachineConstantPool. /// It contains a pointer to the value and an offset from the start of /// the constant pool. -/// @brief An entry in a MachineConstantPool +/// An entry in a MachineConstantPool class MachineConstantPoolEntry { public: /// The constant itself. @@ -117,7 +117,7 @@ public: /// the use of MO_ConstantPoolIndex values. When emitting assembly or machine /// code, these virtual address references are converted to refer to the /// address of the function constant pool values. -/// @brief The machine constant pool. +/// The machine constant pool. class MachineConstantPool { unsigned PoolAlignment; ///< The alignment for the pool. std::vector<MachineConstantPoolEntry> Constants; ///< The pool of constants. @@ -128,7 +128,7 @@ class MachineConstantPool { const DataLayout &getDataLayout() const { return DL; } public: - /// @brief The only constructor. + /// The only constructor. explicit MachineConstantPool(const DataLayout &DL) : PoolAlignment(1), DL(DL) {} ~MachineConstantPool(); diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h index ffbcc62bfa36..75d75bc3669a 100644 --- a/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -37,9 +37,9 @@ public: MachineDominanceFrontier(); - DominanceFrontierBase<MachineBasicBlock, false> &getBase() { return Base; } + ForwardDominanceFrontierBase<MachineBasicBlock> &getBase() { return Base; } - const SmallVectorImpl<MachineBasicBlock *> &getRoots() const { + const SmallVectorImpl<MachineBasicBlock *> &getRoots() const { return Base.getRoots(); } diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 98fdb51aae2f..e3d3d169db97 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -45,7 +45,7 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; /// compute a normal dominator tree. /// class MachineDominatorTree : public MachineFunctionPass { - /// \brief Helper structure used to hold all the basic blocks + /// Helper structure used to hold all the basic blocks /// involved in the split of a critical edge. struct CriticalEdge { MachineBasicBlock *FromBB; @@ -53,12 +53,12 @@ class MachineDominatorTree : public MachineFunctionPass { MachineBasicBlock *NewBB; }; - /// \brief Pile up all the critical edges to be split. + /// Pile up all the critical edges to be split. /// The splitting of a critical edge is local and thus, it is possible /// to apply several of those changes at the same time. mutable SmallVector<CriticalEdge, 32> CriticalEdgesToSplit; - /// \brief Remember all the basic blocks that are inserted during + /// Remember all the basic blocks that are inserted during /// edge splitting. /// Invariant: NewBBs == all the basic blocks contained in the NewBB /// field of all the elements of CriticalEdgesToSplit. @@ -69,7 +69,7 @@ class MachineDominatorTree : public MachineFunctionPass { /// The DominatorTreeBase that is used to compute a normal dominator tree std::unique_ptr<DomTreeBase<MachineBasicBlock>> DT; - /// \brief Apply all the recorded critical edges to the DT. + /// Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses /// the fast query path of DT as much as possible. /// @@ -228,7 +228,7 @@ public: void print(raw_ostream &OS, const Module*) const override; - /// \brief Record that the critical edge (FromBB, ToBB) has been + /// Record that the critical edge (FromBB, ToBB) has been /// split with NewBB. /// This is best to use this method instead of directly update the /// underlying information, because this helps mitigating the @@ -249,12 +249,6 @@ public: "A basic block inserted via edge splitting cannot appear twice"); CriticalEdgesToSplit.push_back({FromBB, ToBB, NewBB}); } - - /// \brief Verify the correctness of the domtree by re-computing it. - /// - /// This should only be used for debugging as it aborts the program if the - /// verification fails. - void verifyDomTree() const; }; //===------------------------------------- diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index f887517217e1..2d6081f3577d 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -85,9 +85,23 @@ public: /// stack offsets of the object, eliminating all MO_FrameIndex operands from /// the program. /// -/// @brief Abstract Stack Frame Information +/// Abstract Stack Frame Information class MachineFrameInfo { +public: + /// Stack Smashing Protection (SSP) rules require that vulnerable stack + /// allocations are located close the stack protector. + enum SSPLayoutKind { + SSPLK_None, ///< Did not trigger a stack protector. No effect on data + ///< layout. + SSPLK_LargeArray, ///< Array or nested array >= SSP-buffer-size. Closest + ///< to the stack protector. + SSPLK_SmallArray, ///< Array or nested array < SSP-buffer-size. 2nd closest + ///< to the stack protector. + SSPLK_AddrOf ///< The address of this allocation is exposed and + ///< triggered protection. 3rd closest to the protector. + }; +private: // Represent a single object allocated on the stack. struct StackObject { // The offset of this object from the stack pointer on entry to @@ -123,6 +137,9 @@ class MachineFrameInfo { /// necessarily reside in the same contiguous memory block as other stack /// objects. Objects with differing stack IDs should not be merged or /// replaced substituted for each other. + // + /// It is assumed a target uses consecutive, increasing stack IDs starting + /// from 1. uint8_t StackID; /// If this stack object is originated from an Alloca instruction @@ -145,12 +162,15 @@ class MachineFrameInfo { /// If true, the object has been zero-extended. bool isSExt = false; + uint8_t SSPLayout; + StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset, bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, bool IsAliased, uint8_t StackID = 0) : SPOffset(SPOffset), Size(Size), Alignment(Alignment), isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), - StackID(StackID), Alloca(Alloca), isAliased(IsAliased) {} + StackID(StackID), Alloca(Alloca), isAliased(IsAliased), + SSPLayout(SSPLK_None) {} }; /// The alignment of the stack. @@ -485,6 +505,20 @@ public: Objects[ObjectIdx+NumFixedObjects].SPOffset = SPOffset; } + SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + return (SSPLayoutKind)Objects[ObjectIdx+NumFixedObjects].SSPLayout; + } + + void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind) { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + assert(!isDeadObjectIndex(ObjectIdx) && + "Setting SSP layout for a dead object?"); + Objects[ObjectIdx+NumFixedObjects].SSPLayout = Kind; + } + /// Return the number of bytes that must be allocated to hold /// all of the fixed size frame objects. This is only valid after /// Prolog/Epilog code insertion has finalized the stack frame layout. diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 7d8b7ebe8d62..e8a4d529faac 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -73,6 +73,7 @@ class SlotIndexes; class TargetMachine; class TargetRegisterClass; class TargetSubtargetInfo; +struct WasmEHFuncInfo; struct WinEHFuncInfo; template <> struct ilist_alloc_traits<MachineBasicBlock> { @@ -80,8 +81,8 @@ template <> struct ilist_alloc_traits<MachineBasicBlock> { }; template <> struct ilist_callback_traits<MachineBasicBlock> { - void addNodeToList(MachineBasicBlock* MBB); - void removeNodeFromList(MachineBasicBlock* MBB); + void addNodeToList(MachineBasicBlock* N); + void removeNodeFromList(MachineBasicBlock* N); template <class Iterator> void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) { @@ -96,7 +97,7 @@ template <> struct ilist_callback_traits<MachineBasicBlock> { struct MachineFunctionInfo { virtual ~MachineFunctionInfo(); - /// \brief Factory function: default behavior is to call new using the + /// Factory function: default behavior is to call new using the /// supplied allocator. /// /// This function can be overridden in a derive class. @@ -245,6 +246,10 @@ class MachineFunction { // Keep track of jump tables for switch instructions MachineJumpTableInfo *JumpTableInfo; + // Keeps track of Wasm exception handling related data. This will be null for + // functions that aren't using a wasm EH personality. + WasmEHFuncInfo *WasmEHInfo = nullptr; + // Keeps track of Windows exception handling related data. This will be null // for functions that aren't using a funclet-based EH personality. WinEHFuncInfo *WinEHInfo = nullptr; @@ -319,6 +324,7 @@ class MachineFunction { bool CallsEHReturn = false; bool CallsUnwindInit = false; + bool HasEHScopes = false; bool HasEHFunclets = false; /// List of C++ TypeInfo used. @@ -349,17 +355,18 @@ public: struct VariableDbgInfo { const DILocalVariable *Var; const DIExpression *Expr; - unsigned Slot; + // The Slot can be negative for fixed stack objects. + int Slot; const DILocation *Loc; VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr, - unsigned Slot, const DILocation *Loc) + int Slot, const DILocation *Loc) : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {} }; using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>; VariableDbgInfoMapTy VariableDbgInfos; - MachineFunction(const Function &F, const TargetMachine &TM, + MachineFunction(const Function &F, const TargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &MMI); MachineFunction(const MachineFunction &) = delete; @@ -430,6 +437,12 @@ public: MachineConstantPool *getConstantPool() { return ConstantPool; } const MachineConstantPool *getConstantPool() const { return ConstantPool; } + /// getWasmEHFuncInfo - Return information about how the current function uses + /// Wasm exception handling. Returns null for functions that don't use wasm + /// exception handling. + const WasmEHFuncInfo *getWasmEHFuncInfo() const { return WasmEHInfo; } + WasmEHFuncInfo *getWasmEHFuncInfo() { return WasmEHInfo; } + /// getWinEHFuncInfo - Return information about how the current function uses /// Windows exception handling. Returns null for functions that don't use /// funclets for exception handling. @@ -609,7 +622,7 @@ public: //===--------------------------------------------------------------------===// // Internal functions used to automatically number MachineBasicBlocks - /// \brief Adds the MBB to the internal numbering. Returns the unique number + /// Adds the MBB to the internal numbering. Returns the unique number /// assigned to the MBB. unsigned addToMBBNumbering(MachineBasicBlock *MBB) { MBBNumbering.push_back(MBB); @@ -695,14 +708,8 @@ public: OperandRecycler.deallocate(Cap, Array); } - /// \brief Allocate and initialize a register mask with @p NumRegister bits. - uint32_t *allocateRegisterMask(unsigned NumRegister) { - unsigned Size = (NumRegister + 31) / 32; - uint32_t *Mask = Allocator.Allocate<uint32_t>(Size); - for (unsigned i = 0; i != Size; ++i) - Mask[i] = 0; - return Mask; - } + /// Allocate and initialize a register mask with @p NumRegister bits. + uint32_t *allocateRegMask(); /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand /// pointers. This array is owned by the MachineFunction. @@ -759,6 +766,9 @@ public: bool callsUnwindInit() const { return CallsUnwindInit; } void setCallsUnwindInit(bool b) { CallsUnwindInit = b; } + bool hasEHScopes() const { return HasEHScopes; } + void setHasEHScopes(bool V) { HasEHScopes = V; } + bool hasEHFunclets() const { return HasEHFunclets; } void setHasEHFunclets(bool V) { HasEHFunclets = V; } @@ -793,7 +803,7 @@ public: void addCleanup(MachineBasicBlock *LandingPad); void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter, - const BlockAddress *RecoverLabel); + const BlockAddress *RecoverBA); void addSEHCleanupHandler(MachineBasicBlock *LandingPad, const Function *Cleanup); @@ -860,7 +870,7 @@ public: /// Collect information used to emit debugging information of a variable. void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr, - unsigned Slot, const DILocation *Loc) { + int Slot, const DILocation *Loc) { VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc); } diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 3c1c1bb14f42..88e13cdf4138 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -80,7 +80,21 @@ public: FrameDestroy = 1 << 1, // Instruction is used as a part of // function frame destruction code. BundledPred = 1 << 2, // Instruction has bundled predecessors. - BundledSucc = 1 << 3 // Instruction has bundled successors. + BundledSucc = 1 << 3, // Instruction has bundled successors. + FmNoNans = 1 << 4, // Instruction does not support Fast + // math nan values. + FmNoInfs = 1 << 5, // Instruction does not support Fast + // math infinity values. + FmNsz = 1 << 6, // Instruction is not required to retain + // signed zero values. + FmArcp = 1 << 7, // Instruction supports Fast math + // reciprocal approximations. + FmContract = 1 << 8, // Instruction supports Fast math + // contraction operations like fma. + FmAfn = 1 << 9, // Instruction may map to Fast math + // instrinsic approximation. + FmReassoc = 1 << 10 // Instruction supports Fast math + // reassociation of operand order. }; private: @@ -93,7 +107,7 @@ private: using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity; OperandCapacity CapOperands; // Capacity of the Operands array. - uint8_t Flags = 0; // Various bits of additional + uint16_t Flags = 0; // Various bits of additional // information about machine // instruction. @@ -127,7 +141,7 @@ private: /// This constructor create a MachineInstr and add the implicit operands. /// It reserves space for number of operands specified by /// MCInstrDesc. An explicit DebugLoc is supplied. - MachineInstr(MachineFunction &, const MCInstrDesc &MCID, DebugLoc dl, + MachineInstr(MachineFunction &, const MCInstrDesc &tid, DebugLoc dl, bool NoImp = false); // MachineInstrs are pool-allocated and owned by MachineFunction. @@ -175,7 +189,7 @@ public: } /// Return the MI flags bitvector. - uint8_t getFlags() const { + uint16_t getFlags() const { return Flags; } @@ -186,7 +200,7 @@ public: /// Set a MI flag. void setFlag(MIFlag Flag) { - Flags |= (uint8_t)Flag; + Flags |= (uint16_t)Flag; } void setFlags(unsigned flags) { @@ -197,7 +211,7 @@ public: /// clearFlag - Clear a MI flag. void clearFlag(MIFlag Flag) { - Flags &= ~((uint8_t)Flag); + Flags &= ~((uint16_t)Flag); } /// Return true if MI is in a bundle (but not the first MI in a bundle). @@ -278,6 +292,10 @@ public: /// this DBG_VALUE instruction. const DIExpression *getDebugExpression() const; + /// Return the debug label referenced by + /// this DBG_LABEL instruction. + const DILabel *getDebugLabel() const; + /// Emit an error referring to the source location of this instruction. /// This should only be used for inline assembly that is somehow /// impossible to compile. Other errors should have been handled much @@ -304,6 +322,11 @@ public: return Operands[i]; } + /// Returns the total number of definitions. + unsigned getNumDefs() const { + return getNumExplicitDefs() + MCID->getNumImplicitDefs(); + } + /// Return true if operand \p OpIdx is a subregister index. bool isOperandSubregIdx(unsigned OpIdx) const { assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && @@ -322,6 +345,9 @@ public: /// Returns the number of non-implicit operands. unsigned getNumExplicitOperands() const; + /// Returns the number of non-implicit definitions. + unsigned getNumExplicitDefs() const; + /// iterator/begin/end - Iterate over all operands of a machine instruction. using mop_iterator = MachineOperand *; using const_mop_iterator = const MachineOperand *; @@ -356,31 +382,29 @@ public: /// Implicit definition are not included! iterator_range<mop_iterator> defs() { return make_range(operands_begin(), - operands_begin() + getDesc().getNumDefs()); + operands_begin() + getNumExplicitDefs()); } /// \copydoc defs() iterator_range<const_mop_iterator> defs() const { return make_range(operands_begin(), - operands_begin() + getDesc().getNumDefs()); + operands_begin() + getNumExplicitDefs()); } /// Returns a range that includes all operands that are register uses. /// This may include unrelated operands which are not register uses. iterator_range<mop_iterator> uses() { - return make_range(operands_begin() + getDesc().getNumDefs(), - operands_end()); + return make_range(operands_begin() + getNumExplicitDefs(), operands_end()); } /// \copydoc uses() iterator_range<const_mop_iterator> uses() const { - return make_range(operands_begin() + getDesc().getNumDefs(), - operands_end()); + return make_range(operands_begin() + getNumExplicitDefs(), operands_end()); } iterator_range<mop_iterator> explicit_uses() { - return make_range(operands_begin() + getDesc().getNumDefs(), - operands_begin() + getNumExplicitOperands() ); + return make_range(operands_begin() + getNumExplicitDefs(), + operands_begin() + getNumExplicitOperands()); } iterator_range<const_mop_iterator> explicit_uses() const { - return make_range(operands_begin() + getDesc().getNumDefs(), - operands_begin() + getNumExplicitOperands() ); + return make_range(operands_begin() + getNumExplicitDefs(), + operands_begin() + getNumExplicitOperands()); } /// Returns the number of the operand iterator \p I points to. @@ -391,7 +415,7 @@ public: /// Access to memory operands of the instruction mmo_iterator memoperands_begin() const { return MemRefs; } mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; } - /// Return true if we don't have any memory operands which described the the + /// Return true if we don't have any memory operands which described the /// memory access done by this instruction. If this is true, calling code /// must be conservative. bool memoperands_empty() const { return NumMemRefs == 0; } @@ -529,6 +553,12 @@ public: return hasProperty(MCID::MoveImm, Type); } + /// Return true if this instruction is a register move. + /// (including moving values from subreg to reg) + bool isMoveReg(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::MoveReg, Type); + } + /// Return true if this instruction is a bitcast instruction. bool isBitcast(QueryType Type = IgnoreBundle) const { return hasProperty(MCID::Bitcast, Type); @@ -576,7 +606,7 @@ public: return hasProperty(MCID::FoldableAsLoad, Type); } - /// \brief Return true if this instruction behaves + /// Return true if this instruction behaves /// the same way as the generic REG_SEQUENCE instructions. /// E.g., on ARM, /// dX VMOVDRR rY, rZ @@ -590,7 +620,7 @@ public: return hasProperty(MCID::RegSequence, Type); } - /// \brief Return true if this instruction behaves + /// Return true if this instruction behaves /// the same way as the generic EXTRACT_SUBREG instructions. /// E.g., on ARM, /// rX, rY VMOVRRD dZ @@ -605,7 +635,7 @@ public: return hasProperty(MCID::ExtractSubreg, Type); } - /// \brief Return true if this instruction behaves + /// Return true if this instruction behaves /// the same way as the generic INSERT_SUBREG instructions. /// E.g., on ARM, /// dX = VSETLNi32 dY, rZ, Imm @@ -817,6 +847,8 @@ public: bool isPosition() const { return isLabel() || isCFIInstruction(); } bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; } + bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; } + bool isDebugInstr() const { return isDebugValue() || isDebugLabel(); } /// A DBG_VALUE is indirect iff the first operand is a register and /// the second operand is an immediate. @@ -893,6 +925,9 @@ public: case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: case TargetOpcode::DBG_VALUE: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::LIFETIME_START: + case TargetOpcode::LIFETIME_END: return true; } } @@ -1047,7 +1082,7 @@ public: const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; - /// \brief Applies the constraints (def/use) implied by this MI on \p Reg to + /// Applies the constraints (def/use) implied by this MI on \p Reg to /// the given \p CurRC. /// If \p ExploreBundle is set and MI is part of a bundle, all the /// instructions inside the bundle will be taken into account. In other words, @@ -1064,7 +1099,7 @@ public: const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle = false) const; - /// \brief Applies the constraints (def/use) implied by the \p OpIdx operand + /// Applies the constraints (def/use) implied by the \p OpIdx operand /// to the given \p CurRC. /// /// Returns the register class that satisfies both \p CurRC and the @@ -1233,15 +1268,20 @@ public: bool hasComplexRegisterTies() const; /// Print this MI to \p OS. + /// Don't print information that can be inferred from other instructions if + /// \p IsStandalone is false. It is usually true when only a fragment of the + /// function is printed. /// Only print the defs and the opcode if \p SkipOpers is true. /// Otherwise, also print operands if \p SkipDebugLoc is true. /// Otherwise, also print the debug loc, with a terminating newline. /// \p TII is used to print the opcode name. If it's not present, but the /// MI is in a function, the opcode will be printed using the function's TII. - void print(raw_ostream &OS, bool SkipOpers = false, bool SkipDebugLoc = false, + void print(raw_ostream &OS, bool IsStandalone = true, bool SkipOpers = false, + bool SkipDebugLoc = false, bool AddNewLine = true, const TargetInstrInfo *TII = nullptr) const; - void print(raw_ostream &OS, ModuleSlotTracker &MST, bool SkipOpers = false, - bool SkipDebugLoc = false, + void print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsStandalone = true, + bool SkipOpers = false, bool SkipDebugLoc = false, + bool AddNewLine = true, const TargetInstrInfo *TII = nullptr) const; void dump() const; /// @} @@ -1281,7 +1321,7 @@ public: /// Erase an operand from an instruction, leaving it with one /// fewer operand than it started with. - void RemoveOperand(unsigned i); + void RemoveOperand(unsigned OpNo); /// Add a MachineMemOperand to the machine instruction. /// This function should be used only occasionally. The setMemRefs function @@ -1311,6 +1351,11 @@ public: /// modify the memrefs of the this MachineInstr. std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other); + /// Return the MIFlags which represent both MachineInstrs. This + /// should be used when merging two MachineInstrs into one. This routine does + /// not modify the MIFlags of this MachineInstr. + uint16_t mergeFlagsWith(const MachineInstr& Other) const; + /// Clear this MachineInstr's memory reference descriptor list. This resets /// the memrefs to their most conservative state. This should be used only /// as a last resort since it greatly pessimizes our knowledge of the memory @@ -1351,7 +1396,7 @@ private: /// Slow path for hasProperty when we're dealing with a bundle. bool hasPropertyInBundle(unsigned Mask, QueryType Type) const; - /// \brief Implements the logic of getRegClassConstraintEffectForVReg for the + /// Implements the logic of getRegClassConstraintEffectForVReg for the /// this MI and the given operand index \p OpIdx. /// If the related operand does not constrained Reg, this returns CurRC. const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl( diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index e4f3976ec950..665608755741 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -20,6 +20,7 @@ #define LLVM_CODEGEN_MACHINEINSTRBUILDER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -219,6 +220,9 @@ public: assert((MI->isDebugValue() ? static_cast<bool>(MI->getDebugVariable()) : true) && "first MDNode argument of a DBG_VALUE not a variable"); + assert((MI->isDebugLabel() ? static_cast<bool>(MI->getDebugLabel()) + : true) && + "first MDNode argument of a DBG_LABEL not a label"); return *this; } @@ -283,6 +287,12 @@ public: MI->copyImplicitOps(*MF, OtherMI); return *this; } + + bool constrainAllUses(const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + } }; /// Builder interface. Specify how to create the initial instruction itself. @@ -408,6 +418,13 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic +/// for a MachineOperand. +MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, + const MCInstrDesc &MCID, bool IsIndirect, + MachineOperand &MO, const MDNode *Variable, + const MDNode *Expr); + +/// This version of the builder builds a DBG_VALUE intrinsic /// for either a value in a register or a register-indirect /// address and inserts it at position I. MachineInstrBuilder BuildMI(MachineBasicBlock &BB, @@ -416,6 +433,14 @@ MachineInstrBuilder BuildMI(MachineBasicBlock &BB, unsigned Reg, const MDNode *Variable, const MDNode *Expr); +/// This version of the builder builds a DBG_VALUE intrinsic +/// for a machine operand and inserts it at position I. +MachineInstrBuilder BuildMI(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + const MCInstrDesc &MCID, bool IsIndirect, + MachineOperand &MO, const MDNode *Variable, + const MDNode *Expr); + /// Clone a DBG_VALUE whose value has been spilled to FrameIndex. MachineInstr *buildDbgValueForSpill(MachineBasicBlock &BB, MachineBasicBlock::iterator I, diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index 104655e45524..917fb90380f5 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -54,7 +54,7 @@ public: /// that contains the header. MachineBasicBlock *getBottomBlock(); - /// \brief Find the block that contains the loop control variable and the + /// Find the block that contains the loop control variable and the /// loop test. This will return the latch block if it's one of the exiting /// blocks. Otherwise, return the exiting block. Return 'null' when /// multiple exiting blocks are present. @@ -97,7 +97,7 @@ public: LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; } - /// \brief Find the block that either is the loop preheader, or could + /// Find the block that either is the loop preheader, or could /// speculatively be used as the preheader. This is e.g. useful to place /// loop setup code. Code that cannot be speculated should not be placed /// here. SpeculativePreheader is controlling whether it also tries to diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index c5b204a79f04..078ef7ca510c 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -184,7 +184,7 @@ public: /// atomic operations the atomic ordering requirements when store does not /// occur must also be specified. MachineMemOperand(MachinePointerInfo PtrInfo, Flags flags, uint64_t s, - unsigned base_alignment, + uint64_t a, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, SyncScope::ID SSID = SyncScope::System, @@ -295,6 +295,9 @@ public: /// @{ void print(raw_ostream &OS) const; void print(raw_ostream &OS, ModuleSlotTracker &MST) const; + void print(raw_ostream &OS, ModuleSlotTracker &MST, + SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context, + const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const; /// @} friend bool operator==(const MachineMemOperand &LHS, diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 4be7942c2c64..53e8889d118a 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -74,7 +74,7 @@ public: private: /// OpKind - Specify what kind of operand this is. This discriminates the /// union. - MachineOperandType OpKind : 8; + unsigned OpKind : 8; /// Subregister number for MO_Register. A value of 0 indicates the /// MO_Register has no subReg. @@ -85,17 +85,17 @@ private: /// TiedTo - Non-zero when this register operand is tied to another register /// operand. The encoding of this field is described in the block comment /// before MachineInstr::tieOperands(). - unsigned char TiedTo : 4; + unsigned TiedTo : 4; /// IsDef - True if this is a def, false if this is a use of the register. /// This is only valid on register operands. /// - bool IsDef : 1; + unsigned IsDef : 1; /// IsImp - True if this is an implicit def or use, false if it is explicit. /// This is only valid on register opderands. /// - bool IsImp : 1; + unsigned IsImp : 1; /// IsDeadOrKill /// For uses: IsKill - True if this instruction is the last use of the @@ -103,14 +103,10 @@ private: /// For defs: IsDead - True if this register is never used by a subsequent /// instruction. /// This is only valid on register operands. - bool IsDeadOrKill : 1; + unsigned IsDeadOrKill : 1; - /// IsRenamable - True if this register may be renamed, i.e. it does not - /// generate a value that is somehow read in a way that is not represented by - /// the Machine IR (e.g. to meet an ABI or ISA requirement). This is only - /// valid on physical register operands. Virtual registers are assumed to - /// always be renamable regardless of the value of this field. - bool IsRenamable : 1; + /// See isRenamable(). + unsigned IsRenamable : 1; /// IsUndef - True if this register operand reads an "undef" value, i.e. the /// read value doesn't matter. This flag can be set on both use and def @@ -129,7 +125,7 @@ private: /// Any register can be used for %2, and its value doesn't matter, but /// the two operands must be the same register. /// - bool IsUndef : 1; + unsigned IsUndef : 1; /// IsInternalRead - True if this operand reads a value that was defined /// inside the same instruction or bundle. This flag can be set on both use @@ -140,16 +136,16 @@ private: /// When this flag is set, the instruction bundle must contain at least one /// other def of the register. If multiple instructions in the bundle define /// the register, the meaning is target-defined. - bool IsInternalRead : 1; + unsigned IsInternalRead : 1; /// IsEarlyClobber - True if this MO_Register 'def' operand is written to /// by the MachineInstr before all input registers are read. This is used to /// model the GCC inline asm '&' constraint modifier. - bool IsEarlyClobber : 1; + unsigned IsEarlyClobber : 1; /// IsDebug - True if this MO_Register 'use' operand is in a debug pseudo, /// not a real instruction. Such uses should be ignored during codegen. - bool IsDebug : 1; + unsigned IsDebug : 1; /// SmallContents - This really should be part of the Contents union, but /// lives out here so we can get a better packed struct. @@ -198,7 +194,19 @@ private: } Contents; explicit MachineOperand(MachineOperandType K) - : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) {} + : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) { + // Assert that the layout is what we expect. It's easy to grow this object. + static_assert(alignof(MachineOperand) <= alignof(int64_t), + "MachineOperand shouldn't be more than 8 byte aligned"); + static_assert(sizeof(Contents) <= 2 * sizeof(void *), + "Contents should be at most two pointers"); + static_assert(sizeof(MachineOperand) <= + alignTo<alignof(int64_t)>(2 * sizeof(unsigned) + + 3 * sizeof(void *)), + "MachineOperand too big. Should be Kind, SmallContents, " + "ParentMI, and Contents"); + } + public: /// getType - Returns the MachineOperandType for this operand. /// @@ -238,7 +246,7 @@ public: /// MO_Immediate operands can also be subreg idices. If it's the case, the /// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be /// called to check this. - static void printSubregIdx(raw_ostream &OS, uint64_t Index, + static void printSubRegIdx(raw_ostream &OS, uint64_t Index, const TargetRegisterInfo *TRI); /// Print operand target flags. @@ -270,6 +278,9 @@ public: /// \param PrintDef - whether we want to print `def` on an operand which /// isDef. Sometimes, if the operand is printed before '=', we don't print /// `def`. + /// \param IsStandalone - whether we want a verbose output of the MO. This + /// prints extra information that can be easily inferred when printing the + /// whole function, but not when printing only a fragment of it. /// \param ShouldPrintRegisterTies - whether we want to print register ties. /// Sometimes they are easily determined by the instruction's descriptor /// (MachineInstr::hasComplexRegiterTies can determine if it's needed). @@ -280,10 +291,16 @@ public: /// information from it's parent. /// \param IntrinsicInfo - same as \p TRI. void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint, - bool PrintDef, bool ShouldPrintRegisterTies, + bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies, unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, const TargetIntrinsicInfo *IntrinsicInfo) const; + /// Same as print(os, TRI, IntrinsicInfo), but allows to specify the low-level + /// type to be printed the same way the full version of print(...) does it. + void print(raw_ostream &os, LLT TypeToPrint, + const TargetRegisterInfo *TRI = nullptr, + const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; + void dump() const; //===--------------------------------------------------------------------===// @@ -369,6 +386,35 @@ public: return IsUndef; } + /// isRenamable - Returns true if this register may be renamed, i.e. it does + /// not generate a value that is somehow read in a way that is not represented + /// by the Machine IR (e.g. to meet an ABI or ISA requirement). This is only + /// valid on physical register operands. Virtual registers are assumed to + /// always be renamable regardless of the value of this field. + /// + /// Operands that are renamable can freely be changed to any other register + /// that is a member of the register class returned by + /// MI->getRegClassConstraint(). + /// + /// isRenamable can return false for several different reasons: + /// + /// - ABI constraints (since liveness is not always precisely modeled). We + /// conservatively handle these cases by setting all physical register + /// operands that didn’t start out as virtual regs to not be renamable. + /// Also any physical register operands created after register allocation or + /// whose register is changed after register allocation will not be + /// renamable. This state is tracked in the MachineOperand::IsRenamable + /// bit. + /// + /// - Opcode/target constraints: for opcodes that have complex register class + /// requirements (e.g. that depend on other operands/instructions), we set + /// hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq in the machine opcode + /// description. Operands belonging to instructions with opcodes that are + /// marked hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq return false from + /// isRenamable(). Additionally, the AllowRegisterRenaming target property + /// prevents any operands from being marked renamable for targets that don't + /// have detailed opcode hasExtraSrcRegAllocReq/hasExtraDstRegAllocReq + /// values. bool isRenamable() const; bool isInternalRead() const { @@ -458,10 +504,6 @@ public: void setIsRenamable(bool Val = true); - /// Set IsRenamable to true if there are no extra register allocation - /// requirements placed on this operand by the parent instruction's opcode. - void setIsRenamableIfNoExtraRegAllocReq(); - void setIsInternalRead(bool Val = true) { assert(isReg() && "Wrong MachineOperand mutator"); IsInternalRead = Val; @@ -574,6 +616,11 @@ public: return Contents.RegMask; } + /// Returns number of elements needed for a regmask array. + static unsigned getRegMaskSize(unsigned NumRegs) { + return (NumRegs + 31) / 32; + } + /// getRegLiveOut - Returns a bit mask of live-out registers. const uint32_t *getRegLiveOut() const { assert(isRegLiveOut() && "Wrong MachineOperand accessor"); @@ -594,6 +641,11 @@ public: Contents.ImmVal = immVal; } + void setCImm(const ConstantInt *CI) { + assert(isCImm() && "Wrong MachineOperand mutator"); + Contents.CI = CI; + } + void setFPImm(const ConstantFP *CFP) { assert(isFPImm() && "Wrong MachineOperand mutator"); Contents.CFP = CFP; @@ -641,7 +693,7 @@ public: /// should stay in sync with the hash_value overload below. bool isIdenticalTo(const MachineOperand &Other) const; - /// \brief MachineOperand hash_value overload. + /// MachineOperand hash_value overload. /// /// Note that this includes the same information in the hash that /// isIdenticalTo uses for comparison. It is thus suited for use in hash diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index 2fdefbed37ce..a7ce870400c2 100644 --- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -24,7 +24,7 @@ class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineInstr; -/// \brief Common features for diagnostics dealing with optimization remarks +/// Common features for diagnostics dealing with optimization remarks /// that are used by machine passes. class DiagnosticInfoMIROptimization : public DiagnosticInfoOptimizationBase { public: @@ -151,7 +151,7 @@ public: /// Emit an optimization remark. void emit(DiagnosticInfoOptimizationBase &OptDiag); - /// \brief Whether we allow for extra compile-time budget to perform more + /// Whether we allow for extra compile-time budget to perform more /// analysis to be more informative. /// /// This is useful to enable additional missed optimizations to be reported @@ -164,7 +164,7 @@ public: .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); } - /// \brief Take a lambda that returns a remark which will be emitted. Second + /// Take a lambda that returns a remark which will be emitted. Second /// argument is only used to restrict this to functions. template <typename T> void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) { @@ -192,7 +192,7 @@ private: /// Similar but use value from \p OptDiag and update hotness there. void computeHotness(DiagnosticInfoMIROptimization &Remark); - /// \brief Only allow verbose messages if we know we're filtering by hotness + /// Only allow verbose messages if we know we're filtering by hotness /// (BFI is only set in this case). bool shouldEmitVerbose() { return MBFI != nullptr; } }; diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h new file mode 100644 index 000000000000..4249a99a891b --- /dev/null +++ b/include/llvm/CodeGen/MachineOutliner.h @@ -0,0 +1,226 @@ +//===---- MachineOutliner.h - Outliner data structures ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains all data structures shared between the outliner implemented in +/// MachineOutliner.cpp and target implementations of the outliner. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MACHINEOUTLINER_H +#define LLVM_MACHINEOUTLINER_H + +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +namespace llvm { +namespace outliner { + +/// Represents how an instruction should be mapped by the outliner. +/// \p Legal instructions are those which are safe to outline. +/// \p LegalTerminator instructions are safe to outline, but only as the +/// last instruction in a sequence. +/// \p Illegal instructions are those which cannot be outlined. +/// \p Invisible instructions are instructions which can be outlined, but +/// shouldn't actually impact the outlining result. +enum InstrType { Legal, LegalTerminator, Illegal, Invisible }; + +/// An individual sequence of instructions to be replaced with a call to +/// an outlined function. +struct Candidate { +private: + /// The start index of this \p Candidate in the instruction list. + unsigned StartIdx; + + /// The number of instructions in this \p Candidate. + unsigned Len; + + // The first instruction in this \p Candidate. + MachineBasicBlock::iterator FirstInst; + + // The last instruction in this \p Candidate. + MachineBasicBlock::iterator LastInst; + + // The basic block that contains this Candidate. + MachineBasicBlock *MBB; + + /// Cost of calling an outlined function from this point as defined by the + /// target. + unsigned CallOverhead; + +public: + /// The index of this \p Candidate's \p OutlinedFunction in the list of + /// \p OutlinedFunctions. + unsigned FunctionIdx; + + /// Set to false if the candidate overlapped with another candidate. + bool InCandidateList = true; + + /// Identifier denoting the instructions to emit to call an outlined function + /// from this point. Defined by the target. + unsigned CallConstructionID; + + /// Contains physical register liveness information for the MBB containing + /// this \p Candidate. + /// + /// This is optionally used by the target to calculate more fine-grained + /// cost model information. + LiveRegUnits LRU; + + /// Return the number of instructions in this Candidate. + unsigned getLength() const { return Len; } + + /// Return the start index of this candidate. + unsigned getStartIdx() const { return StartIdx; } + + /// Return the end index of this candidate. + unsigned getEndIdx() const { return StartIdx + Len - 1; } + + /// Set the CallConstructionID and CallOverhead of this candidate to CID and + /// CO respectively. + void setCallInfo(unsigned CID, unsigned CO) { + CallConstructionID = CID; + CallOverhead = CO; + } + + /// Returns the call overhead of this candidate if it is in the list. + unsigned getCallOverhead() const { + return InCandidateList ? CallOverhead : 0; + } + + MachineBasicBlock::iterator &front() { return FirstInst; } + MachineBasicBlock::iterator &back() { return LastInst; } + MachineFunction *getMF() const { return MBB->getParent(); } + MachineBasicBlock *getMBB() const { return MBB; } + + /// The number of instructions that would be saved by outlining every + /// candidate of this type. + /// + /// This is a fixed value which is not updated during the candidate pruning + /// process. It is only used for deciding which candidate to keep if two + /// candidates overlap. The true benefit is stored in the OutlinedFunction + /// for some given candidate. + unsigned Benefit = 0; + + Candidate(unsigned StartIdx, unsigned Len, + MachineBasicBlock::iterator &FirstInst, + MachineBasicBlock::iterator &LastInst, MachineBasicBlock *MBB, + unsigned FunctionIdx) + : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst), + MBB(MBB), FunctionIdx(FunctionIdx) {} + Candidate() {} + + /// Used to ensure that \p Candidates are outlined in an order that + /// preserves the start and end indices of other \p Candidates. + bool operator<(const Candidate &RHS) const { + return getStartIdx() > RHS.getStartIdx(); + } + + /// Compute the registers that are live across this Candidate. + /// Used by targets that need this information for cost model calculation. + /// If a target does not need this information, then this should not be + /// called. + void initLRU(const TargetRegisterInfo &TRI) { + assert(MBB->getParent()->getRegInfo().tracksLiveness() && + "Candidate's Machine Function must track liveness"); + LRU.init(TRI); + LRU.addLiveOuts(*MBB); + + // Compute liveness from the end of the block up to the beginning of the + // outlining candidate. + std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(), + [this](MachineInstr &MI) { LRU.stepBackward(MI); }); + } +}; + +/// The information necessary to create an outlined function for some +/// class of candidate. +struct OutlinedFunction { + +private: + /// The number of candidates for this \p OutlinedFunction. + unsigned OccurrenceCount = 0; + +public: + std::vector<std::shared_ptr<Candidate>> Candidates; + + /// The actual outlined function created. + /// This is initialized after we go through and create the actual function. + MachineFunction *MF = nullptr; + + /// A number assigned to this function which appears at the end of its name. + unsigned Name; + + /// The sequence of integers corresponding to the instructions in this + /// function. + std::vector<unsigned> Sequence; + + /// Represents the size of a sequence in bytes. (Some instructions vary + /// widely in size, so just counting the instructions isn't very useful.) + unsigned SequenceSize; + + /// Target-defined overhead of constructing a frame for this function. + unsigned FrameOverhead; + + /// Target-defined identifier for constructing a frame for this function. + unsigned FrameConstructionID; + + /// Return the number of candidates for this \p OutlinedFunction. + unsigned getOccurrenceCount() { return OccurrenceCount; } + + /// Decrement the occurrence count of this OutlinedFunction and return the + /// new count. + unsigned decrement() { + assert(OccurrenceCount > 0 && "Can't decrement an empty function!"); + OccurrenceCount--; + return getOccurrenceCount(); + } + + /// Return the number of bytes it would take to outline this + /// function. + unsigned getOutliningCost() { + unsigned CallOverhead = 0; + for (std::shared_ptr<Candidate> &C : Candidates) + CallOverhead += C->getCallOverhead(); + return CallOverhead + SequenceSize + FrameOverhead; + } + + /// Return the size in bytes of the unoutlined sequences. + unsigned getNotOutlinedCost() { return OccurrenceCount * SequenceSize; } + + /// Return the number of instructions that would be saved by outlining + /// this function. + unsigned getBenefit() { + unsigned NotOutlinedCost = getNotOutlinedCost(); + unsigned OutlinedCost = getOutliningCost(); + return (NotOutlinedCost < OutlinedCost) ? 0 + : NotOutlinedCost - OutlinedCost; + } + + OutlinedFunction(std::vector<Candidate> &Cands, + unsigned SequenceSize, unsigned FrameOverhead, + unsigned FrameConstructionID) + : SequenceSize(SequenceSize), FrameOverhead(FrameOverhead), + FrameConstructionID(FrameConstructionID) { + OccurrenceCount = Cands.size(); + for (Candidate &C : Cands) + Candidates.push_back(std::make_shared<outliner::Candidate>(C)); + + unsigned B = getBenefit(); + for (std::shared_ptr<Candidate> &C : Candidates) + C->Benefit = B; + } + + OutlinedFunction() {} +}; +} // namespace outliner +} // namespace llvm + +#endif diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 3be94f802170..5bf4a49c8b3b 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -20,6 +20,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LowLevelType.h" @@ -75,6 +76,13 @@ private: VirtReg2IndexFunctor> VRegInfo; + /// Map for recovering vreg name from vreg number. + /// This map is used by the MIR Printer. + IndexedMap<std::string, VirtReg2IndexFunctor> VReg2Name; + + /// StringSet that is used to unique vreg names. + StringSet<> VRegNames; + /// The flag is true upon \p UpdatedCSRs initialization /// and false otherwise. bool IsUpdatedCSRsInitialized; @@ -128,9 +136,9 @@ private: /// started. BitVector ReservedRegs; - using VRegToTypeMap = DenseMap<unsigned, LLT>; - /// Map generic virtual registers to their actual size. - mutable std::unique_ptr<VRegToTypeMap> VRegToType; + using VRegToTypeMap = IndexedMap<LLT, VirtReg2IndexFunctor>; + /// Map generic virtual registers to their low-level type. + VRegToTypeMap VRegToType; /// Keep track of the physical registers that are live in to the function. /// Live in values are typically arguments in registers. LiveIn values are @@ -418,6 +426,20 @@ public: /// specified register (it may be live-in). bool def_empty(unsigned RegNo) const { return def_begin(RegNo) == def_end(); } + StringRef getVRegName(unsigned Reg) const { + return VReg2Name.inBounds(Reg) ? StringRef(VReg2Name[Reg]) : ""; + } + + void insertVRegByName(StringRef Name, unsigned Reg) { + assert((Name.empty() || VRegNames.find(Name) == VRegNames.end()) && + "Named VRegs Must be Unique."); + if (!Name.empty()) { + VRegNames.insert(Name); + VReg2Name.grow(Reg); + VReg2Name[Reg] = Name.str(); + } + } + /// Return true if there is exactly one operand defining the specified /// register. bool hasOneDef(unsigned RegNo) const { @@ -548,12 +570,16 @@ public: /// except that it also changes any definitions of the register as well. /// /// Note that it is usually necessary to first constrain ToReg's register - /// class to match the FromReg constraints using: + /// class and register bank to match the FromReg constraints using one of the + /// methods: /// /// constrainRegClass(ToReg, getRegClass(FromReg)) + /// constrainRegAttrs(ToReg, FromReg) + /// RegisterBankInfo::constrainGenericRegister(ToReg, + /// *MRI.getRegClass(FromReg), MRI) /// - /// That function will return NULL if the virtual registers have incompatible - /// constraints. + /// These functions will return a falsy result if the virtual registers have + /// incompatible constraints. /// /// Note that if ToReg is a physical register the function will replace and /// apply sub registers to ToReg in order to obtain a final/proper physical @@ -653,10 +679,30 @@ public: /// new register class, or NULL if no such class exists. /// This should only be used when the constraint is known to be trivial, like /// GR32 -> GR32_NOSP. Beware of increasing register pressure. + /// + /// \note Assumes that the register has a register class assigned. + /// Use RegisterBankInfo::constrainGenericRegister in GlobalISel's + /// InstructionSelect pass and constrainRegAttrs in every other pass, + /// including non-select passes of GlobalISel, instead. const TargetRegisterClass *constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs = 0); + /// Constrain the register class or the register bank of the virtual register + /// \p Reg to be a common subclass and a common bank of both registers + /// provided respectively. Do nothing if any of the attributes (classes, + /// banks, or low-level types) of the registers are deemed incompatible, or if + /// the resulting register will have a class smaller than before and of size + /// less than \p MinNumRegs. Return true if such register attributes exist, + /// false otherwise. + /// + /// \note Assumes that each register has either a low-level type or a class + /// assigned, but not both. Use this method instead of constrainRegClass and + /// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG + /// ISel / FastISel and GlobalISel's InstructionSelect pass respectively. + bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, + unsigned MinNumRegs = 0); + /// recomputeRegClass - Try to find a legal super-class of Reg's register /// class that still satisfies the constraints from the instructions using /// Reg. Returns true if Reg was upgraded. @@ -668,26 +714,23 @@ public: /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. - unsigned createVirtualRegister(const TargetRegisterClass *RegClass); + unsigned createVirtualRegister(const TargetRegisterClass *RegClass, + StringRef Name = ""); - /// Accessor for VRegToType. This accessor should only be used - /// by global-isel related work. - VRegToTypeMap &getVRegToType() const { - if (!VRegToType) - VRegToType.reset(new VRegToTypeMap); - return *VRegToType.get(); - } - - /// Get the low-level type of \p VReg or LLT{} if VReg is not a generic + /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic /// (target independent) virtual register. - LLT getType(unsigned VReg) const; + LLT getType(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) + return VRegToType[Reg]; + return LLT{}; + } /// Set the low-level type of \p VReg to \p Ty. void setType(unsigned VReg, LLT Ty); /// Create and return a new generic virtual register with low-level /// type \p Ty. - unsigned createGenericVirtualRegister(LLT Ty); + unsigned createGenericVirtualRegister(LLT Ty, StringRef Name = ""); /// Remove all types associated to virtual registers (after instruction /// selection and constraining of all generic virtual registers). @@ -698,7 +741,7 @@ public: /// temporarily while constructing machine instructions. Most operations are /// undefined on an incomplete register until one of setRegClass(), /// setRegBank() or setSize() has been called on it. - unsigned createIncompleteVirtualRegister(); + unsigned createIncompleteVirtualRegister(StringRef Name = ""); /// getNumVirtRegs - Return the number of virtual registers created. unsigned getNumVirtRegs() const { return VRegInfo.size(); } diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h index b5ea2080444d..5e91246b402c 100644 --- a/include/llvm/CodeGen/MachineSSAUpdater.h +++ b/include/llvm/CodeGen/MachineSSAUpdater.h @@ -56,7 +56,7 @@ public: /// MachineSSAUpdater constructor. If InsertedPHIs is specified, it will be /// filled in with all PHI Nodes created by rewriting. explicit MachineSSAUpdater(MachineFunction &MF, - SmallVectorImpl<MachineInstr*> *InsertedPHIs = nullptr); + SmallVectorImpl<MachineInstr*> *NewPHI = nullptr); MachineSSAUpdater(const MachineSSAUpdater &) = delete; MachineSSAUpdater &operator=(const MachineSSAUpdater &) = delete; ~MachineSSAUpdater(); diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index e327881de13a..85ffa4eda2b8 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -237,7 +237,7 @@ public: /// be scheduled at the bottom. virtual SUnit *pickNode(bool &IsTopNode) = 0; - /// \brief Scheduler callback to notify that a new subtree is scheduled. + /// Scheduler callback to notify that a new subtree is scheduled. virtual void scheduleTree(unsigned SubtreeID) {} /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an @@ -318,11 +318,11 @@ public: Mutations.push_back(std::move(Mutation)); } - /// \brief True if an edge can be added from PredSU to SuccSU without creating + /// True if an edge can be added from PredSU to SuccSU without creating /// a cycle. bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); - /// \brief Add a DAG edge to the given SU with the given predecessor + /// Add a DAG edge to the given SU with the given predecessor /// dependence data. /// /// \returns true if the edge may be added without creating a cycle OR if an @@ -374,7 +374,7 @@ protected: /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. void placeDebugValues(); - /// \brief dump the scheduled Sequence. + /// dump the scheduled Sequence. void dumpSchedule() const; // Lesser helpers... @@ -445,7 +445,7 @@ public: /// Return true if this DAG supports VReg liveness and RegPressure. bool hasVRegLiveness() const override { return true; } - /// \brief Return true if register pressure tracking is enabled. + /// Return true if register pressure tracking is enabled. bool isTrackingPressure() const { return ShouldTrackPressure; } /// Get current register pressure for the top scheduled instructions. @@ -897,6 +897,28 @@ protected: #endif }; +// Utility functions used by heuristics in tryCandidate(). +bool tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason); +bool tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason); +bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone); +bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF); +unsigned getWeakLeft(const SUnit *SU, bool isTop); +int biasPhysRegCopy(const SUnit *SU, bool isTop); + /// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. class GenericScheduler : public GenericSchedulerBase { @@ -963,9 +985,8 @@ protected: const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone); + virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const; SUnit *pickNodeBidirectional(bool &IsTopNode); diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h deleted file mode 100644 index b452684757f6..000000000000 --- a/include/llvm/CodeGen/MachineValueType.h +++ /dev/null @@ -1,1055 +0,0 @@ -//===- CodeGen/MachineValueType.h - Machine-Level types ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the set of machine-level target independent types which -// legal values in the code generator use. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINEVALUETYPE_H -#define LLVM_CODEGEN_MACHINEVALUETYPE_H - -#include "llvm/ADT/iterator_range.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include <cassert> - -namespace llvm { - - class Type; - - /// Machine Value Type. Every type that is supported natively by some - /// processor targeted by LLVM occurs here. This means that any legal value - /// type can be represented by an MVT. - class MVT { - public: - enum SimpleValueType : uint8_t { - // Simple value types that aren't explicitly part of this enumeration - // are considered extended value types. - INVALID_SIMPLE_VALUE_TYPE = 0, - - // If you change this numbering, you must change the values in - // ValueTypes.td as well! - Other = 1, // This is a non-standard value - i1 = 2, // This is a 1 bit integer value - i8 = 3, // This is an 8 bit integer value - i16 = 4, // This is a 16 bit integer value - i32 = 5, // This is a 32 bit integer value - i64 = 6, // This is a 64 bit integer value - i128 = 7, // This is a 128 bit integer value - - FIRST_INTEGER_VALUETYPE = i1, - LAST_INTEGER_VALUETYPE = i128, - - f16 = 8, // This is a 16 bit floating point value - f32 = 9, // This is a 32 bit floating point value - f64 = 10, // This is a 64 bit floating point value - f80 = 11, // This is a 80 bit floating point value - f128 = 12, // This is a 128 bit floating point value - ppcf128 = 13, // This is a PPC 128-bit floating point value - - FIRST_FP_VALUETYPE = f16, - LAST_FP_VALUETYPE = ppcf128, - - v1i1 = 14, // 1 x i1 - v2i1 = 15, // 2 x i1 - v4i1 = 16, // 4 x i1 - v8i1 = 17, // 8 x i1 - v16i1 = 18, // 16 x i1 - v32i1 = 19, // 32 x i1 - v64i1 = 20, // 64 x i1 - v128i1 = 21, // 128 x i1 - v512i1 = 22, // 512 x i1 - v1024i1 = 23, // 1024 x i1 - - v1i8 = 24, // 1 x i8 - v2i8 = 25, // 2 x i8 - v4i8 = 26, // 4 x i8 - v8i8 = 27, // 8 x i8 - v16i8 = 28, // 16 x i8 - v32i8 = 29, // 32 x i8 - v64i8 = 30, // 64 x i8 - v128i8 = 31, //128 x i8 - v256i8 = 32, //256 x i8 - - v1i16 = 33, // 1 x i16 - v2i16 = 34, // 2 x i16 - v4i16 = 35, // 4 x i16 - v8i16 = 36, // 8 x i16 - v16i16 = 37, // 16 x i16 - v32i16 = 38, // 32 x i16 - v64i16 = 39, // 64 x i16 - v128i16 = 40, //128 x i16 - - v1i32 = 41, // 1 x i32 - v2i32 = 42, // 2 x i32 - v4i32 = 43, // 4 x i32 - v8i32 = 44, // 8 x i32 - v16i32 = 45, // 16 x i32 - v32i32 = 46, // 32 x i32 - v64i32 = 47, // 64 x i32 - - v1i64 = 48, // 1 x i64 - v2i64 = 49, // 2 x i64 - v4i64 = 50, // 4 x i64 - v8i64 = 51, // 8 x i64 - v16i64 = 52, // 16 x i64 - v32i64 = 53, // 32 x i64 - - v1i128 = 54, // 1 x i128 - - // Scalable integer types - nxv1i1 = 55, // n x 1 x i1 - nxv2i1 = 56, // n x 2 x i1 - nxv4i1 = 57, // n x 4 x i1 - nxv8i1 = 58, // n x 8 x i1 - nxv16i1 = 59, // n x 16 x i1 - nxv32i1 = 60, // n x 32 x i1 - - nxv1i8 = 61, // n x 1 x i8 - nxv2i8 = 62, // n x 2 x i8 - nxv4i8 = 63, // n x 4 x i8 - nxv8i8 = 64, // n x 8 x i8 - nxv16i8 = 65, // n x 16 x i8 - nxv32i8 = 66, // n x 32 x i8 - - nxv1i16 = 67, // n x 1 x i16 - nxv2i16 = 68, // n x 2 x i16 - nxv4i16 = 69, // n x 4 x i16 - nxv8i16 = 70, // n x 8 x i16 - nxv16i16 = 71, // n x 16 x i16 - nxv32i16 = 72, // n x 32 x i16 - - nxv1i32 = 73, // n x 1 x i32 - nxv2i32 = 74, // n x 2 x i32 - nxv4i32 = 75, // n x 4 x i32 - nxv8i32 = 76, // n x 8 x i32 - nxv16i32 = 77, // n x 16 x i32 - nxv32i32 = 78, // n x 32 x i32 - - nxv1i64 = 79, // n x 1 x i64 - nxv2i64 = 80, // n x 2 x i64 - nxv4i64 = 81, // n x 4 x i64 - nxv8i64 = 82, // n x 8 x i64 - nxv16i64 = 83, // n x 16 x i64 - nxv32i64 = 84, // n x 32 x i64 - - FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, - LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, - - FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, - LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, - - v2f16 = 85, // 2 x f16 - v4f16 = 86, // 4 x f16 - v8f16 = 87, // 8 x f16 - v1f32 = 88, // 1 x f32 - v2f32 = 89, // 2 x f32 - v4f32 = 90, // 4 x f32 - v8f32 = 91, // 8 x f32 - v16f32 = 92, // 16 x f32 - v1f64 = 93, // 1 x f64 - v2f64 = 94, // 2 x f64 - v4f64 = 95, // 4 x f64 - v8f64 = 96, // 8 x f64 - - nxv2f16 = 97, // n x 2 x f16 - nxv4f16 = 98, // n x 4 x f16 - nxv8f16 = 99, // n x 8 x f16 - nxv1f32 = 100, // n x 1 x f32 - nxv2f32 = 101, // n x 2 x f32 - nxv4f32 = 102, // n x 4 x f32 - nxv8f32 = 103, // n x 8 x f32 - nxv16f32 = 104, // n x 16 x f32 - nxv1f64 = 105, // n x 1 x f64 - nxv2f64 = 106, // n x 2 x f64 - nxv4f64 = 107, // n x 4 x f64 - nxv8f64 = 108, // n x 8 x f64 - - FIRST_FP_VECTOR_VALUETYPE = v2f16, - LAST_FP_VECTOR_VALUETYPE = nxv8f64, - - FIRST_FP_SCALABLE_VALUETYPE = nxv2f16, - LAST_FP_SCALABLE_VALUETYPE = nxv8f64, - - FIRST_VECTOR_VALUETYPE = v1i1, - LAST_VECTOR_VALUETYPE = nxv8f64, - - x86mmx = 109, // This is an X86 MMX value - - Glue = 110, // This glues nodes together during pre-RA sched - - isVoid = 111, // This has no value - - Untyped = 112, // This value takes a register, but has - // unspecified type. The register class - // will be determined by the opcode. - - FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 113, // This always remains at the end of the list. - - // This is the current maximum for LAST_VALUETYPE. - // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors - // This value must be a multiple of 32. - MAX_ALLOWED_VALUETYPE = 128, - - // A value of type llvm::TokenTy - token = 248, - - // This is MDNode or MDString. - Metadata = 249, - - // An int value the size of the pointer of the current - // target to any address space. This must only be used internal to - // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR. - iPTRAny = 250, - - // A vector with any length and element size. This is used - // for intrinsics that have overloadings based on vector types. - // This is only for tblgen's consumption! - vAny = 251, - - // Any floating-point or vector floating-point value. This is used - // for intrinsics that have overloadings based on floating-point types. - // This is only for tblgen's consumption! - fAny = 252, - - // An integer or vector integer value of any bit width. This is - // used for intrinsics that have overloadings based on integer bit widths. - // This is only for tblgen's consumption! - iAny = 253, - - // An int value the size of the pointer of the current - // target. This should only be used internal to tblgen! - iPTR = 254, - - // Any type. This is used for intrinsics that have overloadings. - // This is only for tblgen's consumption! - Any = 255 - }; - - SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE; - - // A class to represent the number of elements in a vector - // - // For fixed-length vectors, the total number of elements is equal to 'Min' - // For scalable vectors, the total number of elements is a multiple of 'Min' - class ElementCount { - public: - unsigned Min; - bool Scalable; - - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} - - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; - } - - ElementCount& operator*=(unsigned RHS) { - Min *= RHS; - return *this; - } - - ElementCount operator/(unsigned RHS) { - return { Min / RHS, Scalable }; - } - - ElementCount& operator/=(unsigned RHS) { - Min /= RHS; - return *this; - } - - bool operator==(const ElementCount& RHS) { - return Min == RHS.Min && Scalable == RHS.Scalable; - } - }; - - constexpr MVT() = default; - constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {} - - bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; } - bool operator<(const MVT& S) const { return SimpleTy < S.SimpleTy; } - bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; } - bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; } - bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; } - bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; } - - /// Return true if this is a valid simple valuetype. - bool isValid() const { - return (SimpleTy >= MVT::FIRST_VALUETYPE && - SimpleTy < MVT::LAST_VALUETYPE); - } - - /// Return true if this is a FP or a vector FP type. - bool isFloatingPoint() const { - return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE && - SimpleTy <= MVT::LAST_FP_VALUETYPE) || - (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE)); - } - - /// Return true if this is an integer or a vector integer type. - bool isInteger() const { - return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || - (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE)); - } - - /// Return true if this is an integer, not including vectors. - bool isScalarInteger() const { - return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_VALUETYPE); - } - - /// Return true if this is a vector value type. - bool isVector() const { - return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE && - SimpleTy <= MVT::LAST_VECTOR_VALUETYPE); - } - - /// Return true if this is a vector value type where the - /// runtime length is machine dependent - bool isScalableVector() const { - return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE && - SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) || - (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE && - SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE)); - } - - /// Return true if this is a 16-bit vector type. - bool is16BitVector() const { - return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || - SimpleTy == MVT::v16i1); - } - - /// Return true if this is a 32-bit vector type. - bool is32BitVector() const { - return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 || - SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 || - SimpleTy == MVT::v2f16 || SimpleTy == MVT::v1f32); - } - - /// Return true if this is a 64-bit vector type. - bool is64BitVector() const { - return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 || - SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 || - SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 || - SimpleTy == MVT::v2f32 || SimpleTy == MVT::v1f64); - } - - /// Return true if this is a 128-bit vector type. - bool is128BitVector() const { - return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 || - SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 || - SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 || - SimpleTy == MVT::v8f16 || SimpleTy == MVT::v4f32 || - SimpleTy == MVT::v2f64); - } - - /// Return true if this is a 256-bit vector type. - bool is256BitVector() const { - return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 || - SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 || - SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64); - } - - /// Return true if this is a 512-bit vector type. - bool is512BitVector() const { - return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || - SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 || - SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || - SimpleTy == MVT::v8i64); - } - - /// Return true if this is a 1024-bit vector type. - bool is1024BitVector() const { - return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 || - SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 || - SimpleTy == MVT::v16i64); - } - - /// Return true if this is a 1024-bit vector type. - bool is2048BitVector() const { - return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 || - SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64); - } - - /// Return true if this is an overloaded type for TableGen. - bool isOverloaded() const { - return (SimpleTy==MVT::Any || - SimpleTy==MVT::iAny || SimpleTy==MVT::fAny || - SimpleTy==MVT::vAny || SimpleTy==MVT::iPTRAny); - } - - /// Returns true if the given vector is a power of 2. - bool isPow2VectorType() const { - unsigned NElts = getVectorNumElements(); - return !(NElts & (NElts - 1)); - } - - /// Widens the length of the given vector MVT up to the nearest power of 2 - /// and returns that type. - MVT getPow2VectorType() const { - if (isPow2VectorType()) - return *this; - - unsigned NElts = getVectorNumElements(); - unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts); - return MVT::getVectorVT(getVectorElementType(), Pow2NElts); - } - - /// If this is a vector, return the element type, otherwise return this. - MVT getScalarType() const { - return isVector() ? getVectorElementType() : *this; - } - - MVT getVectorElementType() const { - switch (SimpleTy) { - default: - llvm_unreachable("Not a vector MVT!"); - case v1i1: - case v2i1: - case v4i1: - case v8i1: - case v16i1: - case v32i1: - case v64i1: - case v128i1: - case v512i1: - case v1024i1: - case nxv1i1: - case nxv2i1: - case nxv4i1: - case nxv8i1: - case nxv16i1: - case nxv32i1: return i1; - case v1i8: - case v2i8: - case v4i8: - case v8i8: - case v16i8: - case v32i8: - case v64i8: - case v128i8: - case v256i8: - case nxv1i8: - case nxv2i8: - case nxv4i8: - case nxv8i8: - case nxv16i8: - case nxv32i8: return i8; - case v1i16: - case v2i16: - case v4i16: - case v8i16: - case v16i16: - case v32i16: - case v64i16: - case v128i16: - case nxv1i16: - case nxv2i16: - case nxv4i16: - case nxv8i16: - case nxv16i16: - case nxv32i16: return i16; - case v1i32: - case v2i32: - case v4i32: - case v8i32: - case v16i32: - case v32i32: - case v64i32: - case nxv1i32: - case nxv2i32: - case nxv4i32: - case nxv8i32: - case nxv16i32: - case nxv32i32: return i32; - case v1i64: - case v2i64: - case v4i64: - case v8i64: - case v16i64: - case v32i64: - case nxv1i64: - case nxv2i64: - case nxv4i64: - case nxv8i64: - case nxv16i64: - case nxv32i64: return i64; - case v1i128: return i128; - case v2f16: - case v4f16: - case v8f16: - case nxv2f16: - case nxv4f16: - case nxv8f16: return f16; - case v1f32: - case v2f32: - case v4f32: - case v8f32: - case v16f32: - case nxv1f32: - case nxv2f32: - case nxv4f32: - case nxv8f32: - case nxv16f32: return f32; - case v1f64: - case v2f64: - case v4f64: - case v8f64: - case nxv1f64: - case nxv2f64: - case nxv4f64: - case nxv8f64: return f64; - } - } - - unsigned getVectorNumElements() const { - switch (SimpleTy) { - default: - llvm_unreachable("Not a vector MVT!"); - case v1024i1: return 1024; - case v512i1: return 512; - case v256i8: return 256; - case v128i1: - case v128i8: - case v128i16: return 128; - case v64i1: - case v64i8: - case v64i16: - case v64i32: return 64; - case v32i1: - case v32i8: - case v32i16: - case v32i32: - case v32i64: - case nxv32i1: - case nxv32i8: - case nxv32i16: - case nxv32i32: - case nxv32i64: return 32; - case v16i1: - case v16i8: - case v16i16: - case v16i32: - case v16i64: - case v16f32: - case nxv16i1: - case nxv16i8: - case nxv16i16: - case nxv16i32: - case nxv16i64: - case nxv16f32: return 16; - case v8i1: - case v8i8: - case v8i16: - case v8i32: - case v8i64: - case v8f16: - case v8f32: - case v8f64: - case nxv8i1: - case nxv8i8: - case nxv8i16: - case nxv8i32: - case nxv8i64: - case nxv8f16: - case nxv8f32: - case nxv8f64: return 8; - case v4i1: - case v4i8: - case v4i16: - case v4i32: - case v4i64: - case v4f16: - case v4f32: - case v4f64: - case nxv4i1: - case nxv4i8: - case nxv4i16: - case nxv4i32: - case nxv4i64: - case nxv4f16: - case nxv4f32: - case nxv4f64: return 4; - case v2i1: - case v2i8: - case v2i16: - case v2i32: - case v2i64: - case v2f16: - case v2f32: - case v2f64: - case nxv2i1: - case nxv2i8: - case nxv2i16: - case nxv2i32: - case nxv2i64: - case nxv2f16: - case nxv2f32: - case nxv2f64: return 2; - case v1i1: - case v1i8: - case v1i16: - case v1i32: - case v1i64: - case v1i128: - case v1f32: - case v1f64: - case nxv1i1: - case nxv1i8: - case nxv1i16: - case nxv1i32: - case nxv1i64: - case nxv1f32: - case nxv1f64: return 1; - } - } - - MVT::ElementCount getVectorElementCount() const { - return { getVectorNumElements(), isScalableVector() }; - } - - unsigned getSizeInBits() const { - switch (SimpleTy) { - default: - llvm_unreachable("getSizeInBits called on extended MVT."); - case Other: - llvm_unreachable("Value type is non-standard value, Other."); - case iPTR: - llvm_unreachable("Value type size is target-dependent. Ask TLI."); - case iPTRAny: - case iAny: - case fAny: - case vAny: - case Any: - llvm_unreachable("Value type is overloaded."); - case token: - llvm_unreachable("Token type is a sentinel that cannot be used " - "in codegen and has no size"); - case Metadata: - llvm_unreachable("Value type is metadata."); - case i1: - case v1i1: - case nxv1i1: return 1; - case v2i1: - case nxv2i1: return 2; - case v4i1: - case nxv4i1: return 4; - case i8 : - case v1i8: - case v8i1: - case nxv1i8: - case nxv8i1: return 8; - case i16 : - case f16: - case v16i1: - case v2i8: - case v1i16: - case nxv16i1: - case nxv2i8: - case nxv1i16: return 16; - case f32 : - case i32 : - case v32i1: - case v4i8: - case v2i16: - case v2f16: - case v1f32: - case v1i32: - case nxv32i1: - case nxv4i8: - case nxv2i16: - case nxv1i32: - case nxv2f16: - case nxv1f32: return 32; - case x86mmx: - case f64 : - case i64 : - case v64i1: - case v8i8: - case v4i16: - case v2i32: - case v1i64: - case v4f16: - case v2f32: - case v1f64: - case nxv8i8: - case nxv4i16: - case nxv2i32: - case nxv1i64: - case nxv4f16: - case nxv2f32: - case nxv1f64: return 64; - case f80 : return 80; - case f128: - case ppcf128: - case i128: - case v128i1: - case v16i8: - case v8i16: - case v4i32: - case v2i64: - case v1i128: - case v8f16: - case v4f32: - case v2f64: - case nxv16i8: - case nxv8i16: - case nxv4i32: - case nxv2i64: - case nxv8f16: - case nxv4f32: - case nxv2f64: return 128; - case v32i8: - case v16i16: - case v8i32: - case v4i64: - case v8f32: - case v4f64: - case nxv32i8: - case nxv16i16: - case nxv8i32: - case nxv4i64: - case nxv8f32: - case nxv4f64: return 256; - case v512i1: - case v64i8: - case v32i16: - case v16i32: - case v8i64: - case v16f32: - case v8f64: - case nxv32i16: - case nxv16i32: - case nxv8i64: - case nxv16f32: - case nxv8f64: return 512; - case v1024i1: - case v128i8: - case v64i16: - case v32i32: - case v16i64: - case nxv32i32: - case nxv16i64: return 1024; - case v256i8: - case v128i16: - case v64i32: - case v32i64: - case nxv32i64: return 2048; - } - } - - unsigned getScalarSizeInBits() const { - return getScalarType().getSizeInBits(); - } - - /// Return the number of bytes overwritten by a store of the specified value - /// type. - unsigned getStoreSize() const { - return (getSizeInBits() + 7) / 8; - } - - /// Return the number of bits overwritten by a store of the specified value - /// type. - unsigned getStoreSizeInBits() const { - return getStoreSize() * 8; - } - - /// Return true if this has more bits than VT. - bool bitsGT(MVT VT) const { - return getSizeInBits() > VT.getSizeInBits(); - } - - /// Return true if this has no less bits than VT. - bool bitsGE(MVT VT) const { - return getSizeInBits() >= VT.getSizeInBits(); - } - - /// Return true if this has less bits than VT. - bool bitsLT(MVT VT) const { - return getSizeInBits() < VT.getSizeInBits(); - } - - /// Return true if this has no more bits than VT. - bool bitsLE(MVT VT) const { - return getSizeInBits() <= VT.getSizeInBits(); - } - - static MVT getFloatingPointVT(unsigned BitWidth) { - switch (BitWidth) { - default: - llvm_unreachable("Bad bit width!"); - case 16: - return MVT::f16; - case 32: - return MVT::f32; - case 64: - return MVT::f64; - case 80: - return MVT::f80; - case 128: - return MVT::f128; - } - } - - static MVT getIntegerVT(unsigned BitWidth) { - switch (BitWidth) { - default: - return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); - case 1: - return MVT::i1; - case 8: - return MVT::i8; - case 16: - return MVT::i16; - case 32: - return MVT::i32; - case 64: - return MVT::i64; - case 128: - return MVT::i128; - } - } - - static MVT getVectorVT(MVT VT, unsigned NumElements) { - switch (VT.SimpleTy) { - default: - break; - case MVT::i1: - if (NumElements == 1) return MVT::v1i1; - if (NumElements == 2) return MVT::v2i1; - if (NumElements == 4) return MVT::v4i1; - if (NumElements == 8) return MVT::v8i1; - if (NumElements == 16) return MVT::v16i1; - if (NumElements == 32) return MVT::v32i1; - if (NumElements == 64) return MVT::v64i1; - if (NumElements == 128) return MVT::v128i1; - if (NumElements == 512) return MVT::v512i1; - if (NumElements == 1024) return MVT::v1024i1; - break; - case MVT::i8: - if (NumElements == 1) return MVT::v1i8; - if (NumElements == 2) return MVT::v2i8; - if (NumElements == 4) return MVT::v4i8; - if (NumElements == 8) return MVT::v8i8; - if (NumElements == 16) return MVT::v16i8; - if (NumElements == 32) return MVT::v32i8; - if (NumElements == 64) return MVT::v64i8; - if (NumElements == 128) return MVT::v128i8; - if (NumElements == 256) return MVT::v256i8; - break; - case MVT::i16: - if (NumElements == 1) return MVT::v1i16; - if (NumElements == 2) return MVT::v2i16; - if (NumElements == 4) return MVT::v4i16; - if (NumElements == 8) return MVT::v8i16; - if (NumElements == 16) return MVT::v16i16; - if (NumElements == 32) return MVT::v32i16; - if (NumElements == 64) return MVT::v64i16; - if (NumElements == 128) return MVT::v128i16; - break; - case MVT::i32: - if (NumElements == 1) return MVT::v1i32; - if (NumElements == 2) return MVT::v2i32; - if (NumElements == 4) return MVT::v4i32; - if (NumElements == 8) return MVT::v8i32; - if (NumElements == 16) return MVT::v16i32; - if (NumElements == 32) return MVT::v32i32; - if (NumElements == 64) return MVT::v64i32; - break; - case MVT::i64: - if (NumElements == 1) return MVT::v1i64; - if (NumElements == 2) return MVT::v2i64; - if (NumElements == 4) return MVT::v4i64; - if (NumElements == 8) return MVT::v8i64; - if (NumElements == 16) return MVT::v16i64; - if (NumElements == 32) return MVT::v32i64; - break; - case MVT::i128: - if (NumElements == 1) return MVT::v1i128; - break; - case MVT::f16: - if (NumElements == 2) return MVT::v2f16; - if (NumElements == 4) return MVT::v4f16; - if (NumElements == 8) return MVT::v8f16; - break; - case MVT::f32: - if (NumElements == 1) return MVT::v1f32; - if (NumElements == 2) return MVT::v2f32; - if (NumElements == 4) return MVT::v4f32; - if (NumElements == 8) return MVT::v8f32; - if (NumElements == 16) return MVT::v16f32; - break; - case MVT::f64: - if (NumElements == 1) return MVT::v1f64; - if (NumElements == 2) return MVT::v2f64; - if (NumElements == 4) return MVT::v4f64; - if (NumElements == 8) return MVT::v8f64; - break; - } - return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); - } - - static MVT getScalableVectorVT(MVT VT, unsigned NumElements) { - switch(VT.SimpleTy) { - default: - break; - case MVT::i1: - if (NumElements == 1) return MVT::nxv1i1; - if (NumElements == 2) return MVT::nxv2i1; - if (NumElements == 4) return MVT::nxv4i1; - if (NumElements == 8) return MVT::nxv8i1; - if (NumElements == 16) return MVT::nxv16i1; - if (NumElements == 32) return MVT::nxv32i1; - break; - case MVT::i8: - if (NumElements == 1) return MVT::nxv1i8; - if (NumElements == 2) return MVT::nxv2i8; - if (NumElements == 4) return MVT::nxv4i8; - if (NumElements == 8) return MVT::nxv8i8; - if (NumElements == 16) return MVT::nxv16i8; - if (NumElements == 32) return MVT::nxv32i8; - break; - case MVT::i16: - if (NumElements == 1) return MVT::nxv1i16; - if (NumElements == 2) return MVT::nxv2i16; - if (NumElements == 4) return MVT::nxv4i16; - if (NumElements == 8) return MVT::nxv8i16; - if (NumElements == 16) return MVT::nxv16i16; - if (NumElements == 32) return MVT::nxv32i16; - break; - case MVT::i32: - if (NumElements == 1) return MVT::nxv1i32; - if (NumElements == 2) return MVT::nxv2i32; - if (NumElements == 4) return MVT::nxv4i32; - if (NumElements == 8) return MVT::nxv8i32; - if (NumElements == 16) return MVT::nxv16i32; - if (NumElements == 32) return MVT::nxv32i32; - break; - case MVT::i64: - if (NumElements == 1) return MVT::nxv1i64; - if (NumElements == 2) return MVT::nxv2i64; - if (NumElements == 4) return MVT::nxv4i64; - if (NumElements == 8) return MVT::nxv8i64; - if (NumElements == 16) return MVT::nxv16i64; - if (NumElements == 32) return MVT::nxv32i64; - break; - case MVT::f16: - if (NumElements == 2) return MVT::nxv2f16; - if (NumElements == 4) return MVT::nxv4f16; - if (NumElements == 8) return MVT::nxv8f16; - break; - case MVT::f32: - if (NumElements == 1) return MVT::nxv1f32; - if (NumElements == 2) return MVT::nxv2f32; - if (NumElements == 4) return MVT::nxv4f32; - if (NumElements == 8) return MVT::nxv8f32; - if (NumElements == 16) return MVT::nxv16f32; - break; - case MVT::f64: - if (NumElements == 1) return MVT::nxv1f64; - if (NumElements == 2) return MVT::nxv2f64; - if (NumElements == 4) return MVT::nxv4f64; - if (NumElements == 8) return MVT::nxv8f64; - break; - } - return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); - } - - static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) { - if (IsScalable) - return getScalableVectorVT(VT, NumElements); - return getVectorVT(VT, NumElements); - } - - static MVT getVectorVT(MVT VT, MVT::ElementCount EC) { - if (EC.Scalable) - return getScalableVectorVT(VT, EC.Min); - return getVectorVT(VT, EC.Min); - } - - /// Return the value type corresponding to the specified type. This returns - /// all pointers as iPTR. If HandleUnknown is true, unknown types are - /// returned as Other, otherwise they are invalid. - static MVT getVT(Type *Ty, bool HandleUnknown = false); - - private: - /// A simple iterator over the MVT::SimpleValueType enum. - struct mvt_iterator { - SimpleValueType VT; - - mvt_iterator(SimpleValueType VT) : VT(VT) {} - - MVT operator*() const { return VT; } - bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; } - - mvt_iterator& operator++() { - VT = (MVT::SimpleValueType)((int)VT + 1); - assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE && - "MVT iterator overflowed."); - return *this; - } - }; - - /// A range of the MVT::SimpleValueType enum. - using mvt_range = iterator_range<mvt_iterator>; - - public: - /// SimpleValueType Iteration - /// @{ - static mvt_range all_valuetypes() { - return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE); - } - - static mvt_range integer_valuetypes() { - return mvt_range(MVT::FIRST_INTEGER_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1)); - } - - static mvt_range fp_valuetypes() { - return mvt_range(MVT::FIRST_FP_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1)); - } - - static mvt_range vector_valuetypes() { - return mvt_range(MVT::FIRST_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1)); - } - - static mvt_range integer_vector_valuetypes() { - return mvt_range( - MVT::FIRST_INTEGER_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1)); - } - - static mvt_range fp_vector_valuetypes() { - return mvt_range( - MVT::FIRST_FP_VECTOR_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1)); - } - - static mvt_range integer_scalable_vector_valuetypes() { - return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1)); - } - - static mvt_range fp_scalable_vector_valuetypes() { - return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE, - (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1)); - } - /// @} - }; - -} // end namespace llvm - -#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H diff --git a/include/llvm/CodeGen/MacroFusion.h b/include/llvm/CodeGen/MacroFusion.h index dc105fdc68fd..a77226ddaf33 100644 --- a/include/llvm/CodeGen/MacroFusion.h +++ b/include/llvm/CodeGen/MacroFusion.h @@ -25,7 +25,7 @@ class ScheduleDAGMutation; class TargetInstrInfo; class TargetSubtargetInfo; -/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if /// SecondMI may be part of a fused pair at all. using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII, @@ -33,13 +33,13 @@ using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII, const MachineInstr *FirstMI, const MachineInstr &SecondMI)>; -/// \brief Create a DAG scheduling mutation to pair instructions back to back +/// Create a DAG scheduling mutation to pair instructions back to back /// for instructions that benefit according to the target-specific /// shouldScheduleAdjacent predicate function. std::unique_ptr<ScheduleDAGMutation> createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent); -/// \brief Create a DAG scheduling mutation to pair branch instructions with one +/// Create a DAG scheduling mutation to pair branch instructions with one /// of their predecessors back to back for instructions that benefit according /// to the target-specific shouldScheduleAdjacent predicate function. std::unique_ptr<ScheduleDAGMutation> diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h index e94878ced10d..a6d88b057dcb 100644 --- a/include/llvm/CodeGen/PBQP/Graph.h +++ b/include/llvm/CodeGen/PBQP/Graph.h @@ -29,12 +29,12 @@ namespace PBQP { using NodeId = unsigned; using EdgeId = unsigned; - /// @brief Returns a value representing an invalid (non-existent) node. + /// Returns a value representing an invalid (non-existent) node. static NodeId invalidNodeId() { return std::numeric_limits<NodeId>::max(); } - /// @brief Returns a value representing an invalid (non-existent) edge. + /// Returns a value representing an invalid (non-existent) edge. static EdgeId invalidEdgeId() { return std::numeric_limits<EdgeId>::max(); } @@ -338,19 +338,19 @@ namespace PBQP { const NodeEntry &NE; }; - /// @brief Construct an empty PBQP graph. + /// Construct an empty PBQP graph. Graph() = default; - /// @brief Construct an empty PBQP graph with the given graph metadata. + /// Construct an empty PBQP graph with the given graph metadata. Graph(GraphMetadata Metadata) : Metadata(std::move(Metadata)) {} - /// @brief Get a reference to the graph metadata. + /// Get a reference to the graph metadata. GraphMetadata& getMetadata() { return Metadata; } - /// @brief Get a const-reference to the graph metadata. + /// Get a const-reference to the graph metadata. const GraphMetadata& getMetadata() const { return Metadata; } - /// @brief Lock this graph to the given solver instance in preparation + /// Lock this graph to the given solver instance in preparation /// for running the solver. This method will call solver.handleAddNode for /// each node in the graph, and handleAddEdge for each edge, to give the /// solver an opportunity to set up any requried metadata. @@ -363,13 +363,13 @@ namespace PBQP { Solver->handleAddEdge(EId); } - /// @brief Release from solver instance. + /// Release from solver instance. void unsetSolver() { assert(Solver && "Solver not set."); Solver = nullptr; } - /// @brief Add a node with the given costs. + /// Add a node with the given costs. /// @param Costs Cost vector for the new node. /// @return Node iterator for the added node. template <typename OtherVectorT> @@ -382,7 +382,7 @@ namespace PBQP { return NId; } - /// @brief Add a node bypassing the cost allocator. + /// Add a node bypassing the cost allocator. /// @param Costs Cost vector ptr for the new node (must be convertible to /// VectorPtr). /// @return Node iterator for the added node. @@ -401,7 +401,7 @@ namespace PBQP { return NId; } - /// @brief Add an edge between the given nodes with the given costs. + /// Add an edge between the given nodes with the given costs. /// @param N1Id First node. /// @param N2Id Second node. /// @param Costs Cost matrix for new edge. @@ -419,7 +419,7 @@ namespace PBQP { return EId; } - /// @brief Add an edge bypassing the cost allocator. + /// Add an edge bypassing the cost allocator. /// @param N1Id First node. /// @param N2Id Second node. /// @param Costs Cost matrix for new edge. @@ -444,7 +444,7 @@ namespace PBQP { return EId; } - /// @brief Returns true if the graph is empty. + /// Returns true if the graph is empty. bool empty() const { return NodeIdSet(*this).empty(); } NodeIdSet nodeIds() const { return NodeIdSet(*this); } @@ -452,15 +452,15 @@ namespace PBQP { AdjEdgeIdSet adjEdgeIds(NodeId NId) { return AdjEdgeIdSet(getNode(NId)); } - /// @brief Get the number of nodes in the graph. + /// Get the number of nodes in the graph. /// @return Number of nodes in the graph. unsigned getNumNodes() const { return NodeIdSet(*this).size(); } - /// @brief Get the number of edges in the graph. + /// Get the number of edges in the graph. /// @return Number of edges in the graph. unsigned getNumEdges() const { return EdgeIdSet(*this).size(); } - /// @brief Set a node's cost vector. + /// Set a node's cost vector. /// @param NId Node to update. /// @param Costs New costs to set. template <typename OtherVectorT> @@ -471,7 +471,7 @@ namespace PBQP { getNode(NId).Costs = AllocatedCosts; } - /// @brief Get a VectorPtr to a node's cost vector. Rarely useful - use + /// Get a VectorPtr to a node's cost vector. Rarely useful - use /// getNodeCosts where possible. /// @param NId Node id. /// @return VectorPtr to node cost vector. @@ -483,7 +483,7 @@ namespace PBQP { return getNode(NId).Costs; } - /// @brief Get a node's cost vector. + /// Get a node's cost vector. /// @param NId Node id. /// @return Node cost vector. const Vector& getNodeCosts(NodeId NId) const { @@ -502,7 +502,7 @@ namespace PBQP { return getNode(NId).getAdjEdgeIds().size(); } - /// @brief Update an edge's cost matrix. + /// Update an edge's cost matrix. /// @param EId Edge id. /// @param Costs New cost matrix. template <typename OtherMatrixT> @@ -513,7 +513,7 @@ namespace PBQP { getEdge(EId).Costs = AllocatedCosts; } - /// @brief Get a MatrixPtr to a node's cost matrix. Rarely useful - use + /// Get a MatrixPtr to a node's cost matrix. Rarely useful - use /// getEdgeCosts where possible. /// @param EId Edge id. /// @return MatrixPtr to edge cost matrix. @@ -525,7 +525,7 @@ namespace PBQP { return getEdge(EId).Costs; } - /// @brief Get an edge's cost matrix. + /// Get an edge's cost matrix. /// @param EId Edge id. /// @return Edge cost matrix. const Matrix& getEdgeCosts(EdgeId EId) const { @@ -540,21 +540,21 @@ namespace PBQP { return getEdge(EId).Metadata; } - /// @brief Get the first node connected to this edge. + /// Get the first node connected to this edge. /// @param EId Edge id. /// @return The first node connected to the given edge. NodeId getEdgeNode1Id(EdgeId EId) const { return getEdge(EId).getN1Id(); } - /// @brief Get the second node connected to this edge. + /// Get the second node connected to this edge. /// @param EId Edge id. /// @return The second node connected to the given edge. NodeId getEdgeNode2Id(EdgeId EId) const { return getEdge(EId).getN2Id(); } - /// @brief Get the "other" node connected to this edge. + /// Get the "other" node connected to this edge. /// @param EId Edge id. /// @param NId Node id for the "given" node. /// @return The iterator for the "other" node connected to this edge. @@ -566,7 +566,7 @@ namespace PBQP { return E.getN1Id(); } - /// @brief Get the edge connecting two nodes. + /// Get the edge connecting two nodes. /// @param N1Id First node id. /// @param N2Id Second node id. /// @return An id for edge (N1Id, N2Id) if such an edge exists, @@ -581,7 +581,7 @@ namespace PBQP { return invalidEdgeId(); } - /// @brief Remove a node from the graph. + /// Remove a node from the graph. /// @param NId Node id. void removeNode(NodeId NId) { if (Solver) @@ -598,7 +598,7 @@ namespace PBQP { FreeNodeIds.push_back(NId); } - /// @brief Disconnect an edge from the given node. + /// Disconnect an edge from the given node. /// /// Removes the given edge from the adjacency list of the given node. /// This operation leaves the edge in an 'asymmetric' state: It will no @@ -631,14 +631,14 @@ namespace PBQP { E.disconnectFrom(*this, NId); } - /// @brief Convenience method to disconnect all neighbours from the given + /// Convenience method to disconnect all neighbours from the given /// node. void disconnectAllNeighborsFromNode(NodeId NId) { for (auto AEId : adjEdgeIds(NId)) disconnectEdge(AEId, getEdgeOtherNodeId(AEId, NId)); } - /// @brief Re-attach an edge to its nodes. + /// Re-attach an edge to its nodes. /// /// Adds an edge that had been previously disconnected back into the /// adjacency set of the nodes that the edge connects. @@ -649,7 +649,7 @@ namespace PBQP { Solver->handleReconnectEdge(EId, NId); } - /// @brief Remove an edge from the graph. + /// Remove an edge from the graph. /// @param EId Edge id. void removeEdge(EdgeId EId) { if (Solver) @@ -660,7 +660,7 @@ namespace PBQP { Edges[EId].invalidate(); } - /// @brief Remove all nodes and edges from the graph. + /// Remove all nodes and edges from the graph. void clear() { Nodes.clear(); FreeNodeIds.clear(); diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index ba405e816d10..d1432a3053c4 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -22,34 +22,34 @@ namespace PBQP { using PBQPNum = float; -/// \brief PBQP Vector class. +/// PBQP Vector class. class Vector { friend hash_code hash_value(const Vector &); public: - /// \brief Construct a PBQP vector of the given size. + /// Construct a PBQP vector of the given size. explicit Vector(unsigned Length) : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {} - /// \brief Construct a PBQP vector with initializer. + /// Construct a PBQP vector with initializer. Vector(unsigned Length, PBQPNum InitVal) : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) { std::fill(Data.get(), Data.get() + Length, InitVal); } - /// \brief Copy construct a PBQP vector. + /// Copy construct a PBQP vector. Vector(const Vector &V) : Length(V.Length), Data(llvm::make_unique<PBQPNum []>(Length)) { std::copy(V.Data.get(), V.Data.get() + Length, Data.get()); } - /// \brief Move construct a PBQP vector. + /// Move construct a PBQP vector. Vector(Vector &&V) : Length(V.Length), Data(std::move(V.Data)) { V.Length = 0; } - /// \brief Comparison operator. + /// Comparison operator. bool operator==(const Vector &V) const { assert(Length != 0 && Data && "Invalid vector"); if (Length != V.Length) @@ -57,27 +57,27 @@ public: return std::equal(Data.get(), Data.get() + Length, V.Data.get()); } - /// \brief Return the length of the vector + /// Return the length of the vector unsigned getLength() const { assert(Length != 0 && Data && "Invalid vector"); return Length; } - /// \brief Element access. + /// Element access. PBQPNum& operator[](unsigned Index) { assert(Length != 0 && Data && "Invalid vector"); assert(Index < Length && "Vector element access out of bounds."); return Data[Index]; } - /// \brief Const element access. + /// Const element access. const PBQPNum& operator[](unsigned Index) const { assert(Length != 0 && Data && "Invalid vector"); assert(Index < Length && "Vector element access out of bounds."); return Data[Index]; } - /// \brief Add another vector to this one. + /// Add another vector to this one. Vector& operator+=(const Vector &V) { assert(Length != 0 && Data && "Invalid vector"); assert(Length == V.Length && "Vector length mismatch."); @@ -86,7 +86,7 @@ public: return *this; } - /// \brief Returns the index of the minimum value in this vector + /// Returns the index of the minimum value in this vector unsigned minIndex() const { assert(Length != 0 && Data && "Invalid vector"); return std::min_element(Data.get(), Data.get() + Length) - Data.get(); @@ -97,14 +97,14 @@ private: std::unique_ptr<PBQPNum []> Data; }; -/// \brief Return a hash_value for the given vector. +/// Return a hash_value for the given vector. inline hash_code hash_value(const Vector &V) { unsigned *VBegin = reinterpret_cast<unsigned*>(V.Data.get()); unsigned *VEnd = reinterpret_cast<unsigned*>(V.Data.get() + V.Length); return hash_combine(V.Length, hash_combine_range(VBegin, VEnd)); } -/// \brief Output a textual representation of the given vector on the given +/// Output a textual representation of the given vector on the given /// output stream. template <typename OStream> OStream& operator<<(OStream &OS, const Vector &V) { @@ -118,18 +118,18 @@ OStream& operator<<(OStream &OS, const Vector &V) { return OS; } -/// \brief PBQP Matrix class +/// PBQP Matrix class class Matrix { private: friend hash_code hash_value(const Matrix &); public: - /// \brief Construct a PBQP Matrix with the given dimensions. + /// Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) { } - /// \brief Construct a PBQP Matrix with the given dimensions and initial + /// Construct a PBQP Matrix with the given dimensions and initial /// value. Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal) : Rows(Rows), Cols(Cols), @@ -137,20 +137,20 @@ public: std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal); } - /// \brief Copy construct a PBQP matrix. + /// Copy construct a PBQP matrix. Matrix(const Matrix &M) : Rows(M.Rows), Cols(M.Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) { std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get()); } - /// \brief Move construct a PBQP matrix. + /// Move construct a PBQP matrix. Matrix(Matrix &&M) : Rows(M.Rows), Cols(M.Cols), Data(std::move(M.Data)) { M.Rows = M.Cols = 0; } - /// \brief Comparison operator. + /// Comparison operator. bool operator==(const Matrix &M) const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); if (Rows != M.Rows || Cols != M.Cols) @@ -158,33 +158,33 @@ public: return std::equal(Data.get(), Data.get() + (Rows * Cols), M.Data.get()); } - /// \brief Return the number of rows in this matrix. + /// Return the number of rows in this matrix. unsigned getRows() const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); return Rows; } - /// \brief Return the number of cols in this matrix. + /// Return the number of cols in this matrix. unsigned getCols() const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); return Cols; } - /// \brief Matrix element access. + /// Matrix element access. PBQPNum* operator[](unsigned R) { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); assert(R < Rows && "Row out of bounds."); return Data.get() + (R * Cols); } - /// \brief Matrix element access. + /// Matrix element access. const PBQPNum* operator[](unsigned R) const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); assert(R < Rows && "Row out of bounds."); return Data.get() + (R * Cols); } - /// \brief Returns the given row as a vector. + /// Returns the given row as a vector. Vector getRowAsVector(unsigned R) const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); Vector V(Cols); @@ -193,7 +193,7 @@ public: return V; } - /// \brief Returns the given column as a vector. + /// Returns the given column as a vector. Vector getColAsVector(unsigned C) const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); Vector V(Rows); @@ -202,7 +202,7 @@ public: return V; } - /// \brief Matrix transpose. + /// Matrix transpose. Matrix transpose() const { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); Matrix M(Cols, Rows); @@ -212,7 +212,7 @@ public: return M; } - /// \brief Add the given matrix to this one. + /// Add the given matrix to this one. Matrix& operator+=(const Matrix &M) { assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix"); assert(Rows == M.Rows && Cols == M.Cols && @@ -234,7 +234,7 @@ private: std::unique_ptr<PBQPNum []> Data; }; -/// \brief Return a hash_code for the given matrix. +/// Return a hash_code for the given matrix. inline hash_code hash_value(const Matrix &M) { unsigned *MBegin = reinterpret_cast<unsigned*>(M.Data.get()); unsigned *MEnd = @@ -242,7 +242,7 @@ inline hash_code hash_value(const Matrix &M) { return hash_combine(M.Rows, M.Cols, hash_combine_range(MBegin, MEnd)); } -/// \brief Output a textual representation of the given matrix on the given +/// Output a textual representation of the given matrix on the given /// output stream. template <typename OStream> OStream& operator<<(OStream &OS, const Matrix &M) { diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h index 8aeb51936760..21b99027970d 100644 --- a/include/llvm/CodeGen/PBQP/ReductionRules.h +++ b/include/llvm/CodeGen/PBQP/ReductionRules.h @@ -23,7 +23,7 @@ namespace llvm { namespace PBQP { - /// \brief Reduce a node of degree one. + /// Reduce a node of degree one. /// /// Propagate costs from the given node, which must be of degree one, to its /// neighbor. Notify the problem domain. @@ -166,7 +166,7 @@ namespace PBQP { } #endif - // \brief Find a solution to a fully reduced graph by backpropagation. + // Find a solution to a fully reduced graph by backpropagation. // // Given a graph and a reduction order, pop each node from the reduction // order and greedily compute a minimum solution based on the node costs, and diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h index 6a247277fdfa..4d4379fbc2c2 100644 --- a/include/llvm/CodeGen/PBQP/Solution.h +++ b/include/llvm/CodeGen/PBQP/Solution.h @@ -21,7 +21,7 @@ namespace llvm { namespace PBQP { - /// \brief Represents a solution to a PBQP problem. + /// Represents a solution to a PBQP problem. /// /// To get the selection for each node in the problem use the getSelection method. class Solution { @@ -30,17 +30,17 @@ namespace PBQP { SelectionsMap selections; public: - /// \brief Initialise an empty solution. + /// Initialise an empty solution. Solution() = default; - /// \brief Set the selection for a given node. + /// Set the selection for a given node. /// @param nodeId Node id. /// @param selection Selection for nodeId. void setSelection(GraphBase::NodeId nodeId, unsigned selection) { selections[nodeId] = selection; } - /// \brief Get a node's selection. + /// Get a node's selection. /// @param nodeId Node id. /// @return The selection for nodeId; unsigned getSelection(GraphBase::NodeId nodeId) const { diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h index 269b7a7b3a35..995467dc56d8 100644 --- a/include/llvm/CodeGen/PBQPRAConstraint.h +++ b/include/llvm/CodeGen/PBQPRAConstraint.h @@ -33,7 +33,7 @@ class PBQPRAGraph; using PBQPRAGraph = PBQP::RegAlloc::PBQPRAGraph; -/// @brief Abstract base for classes implementing PBQP register allocation +/// Abstract base for classes implementing PBQP register allocation /// constraints (e.g. Spill-costs, interference, coalescing). class PBQPRAConstraint { public: @@ -44,7 +44,7 @@ private: virtual void anchor(); }; -/// @brief PBQP register allocation constraint composer. +/// PBQP register allocation constraint composer. /// /// Constraints added to this list will be applied, in the order that they are /// added, to the PBQP graph. diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h index 14ef0ec408ba..dbf09ea31e20 100644 --- a/include/llvm/CodeGen/ParallelCG.h +++ b/include/llvm/CodeGen/ParallelCG.h @@ -40,7 +40,7 @@ std::unique_ptr<Module> splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs, ArrayRef<llvm::raw_pwrite_stream *> BCOSs, const std::function<std::unique_ptr<TargetMachine>()> &TMFactory, - TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile, + TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile, bool PreserveLocals = false); } // namespace llvm diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 4370d116e08c..cb12b14f4435 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -154,6 +154,9 @@ namespace llvm { /// This pass adds dead/undef flags after analyzing subregister lanes. extern char &DetectDeadLanesID; + /// This pass perform post-ra machine sink for COPY instructions. + extern char &PostRAMachineSinkingID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// @@ -212,6 +215,10 @@ namespace llvm { /// into tails of their predecessors. extern char &TailDuplicateID; + /// Duplicate blocks with unconditional branches into tails of their + /// predecessors. Variant that works before register allocation. + extern char &EarlyTailDuplicateID; + /// MachineTraceMetrics - This pass computes critical path and CPU resource /// usage in an ensemble of traces. extern char &MachineTraceMetricsID; @@ -269,9 +276,13 @@ namespace llvm { /// memory operations. extern char &ImplicitNullChecksID; - /// MachineLICM - This pass performs LICM on machine instructions. + /// This pass performs loop invariant code motion on machine instructions. extern char &MachineLICMID; + /// This pass performs loop invariant code motion on machine instructions. + /// This variant works before register allocation. \see MachineLICMID. + extern char &EarlyMachineLICMID; + /// MachineSinking - This pass performs sinking on machine instructions. extern char &MachineSinkingID; @@ -290,7 +301,7 @@ namespace llvm { /// StackSlotColoring - This pass performs stack slot coloring. extern char &StackSlotColoringID; - /// \brief This pass lays out funclets contiguously. + /// This pass lays out funclets contiguously. extern char &FuncletLayoutID; /// This pass inserts the XRay instrumentation sleds if they are supported by @@ -300,7 +311,7 @@ namespace llvm { /// This pass inserts FEntry calls extern char &FEntryInserterID; - /// \brief This pass implements the "patchable-function" attribute. + /// This pass implements the "patchable-function" attribute. extern char &PatchableFunctionID; /// createStackProtectorPass - This pass adds stack protectors to functions. @@ -318,13 +329,17 @@ namespace llvm { /// createWinEHPass - Prepares personality functions used by MSVC on Windows, /// in addition to the Itanium LSDA based personalities. - FunctionPass *createWinEHPass(); + FunctionPass *createWinEHPass(bool DemoteCatchSwitchPHIOnly = false); /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. /// FunctionPass *createSjLjEHPreparePass(); + /// createWasmEHPass - This pass adapts exception handling code to use + /// WebAssembly's exception handling scheme. + FunctionPass *createWasmEHPass(); + /// LocalStackSlotAllocation - This pass assigns local frame indices to stack /// slots relative to one another and allocates base registers to access them /// when it is estimated by the target to be out of range of normal frame @@ -369,7 +384,7 @@ namespace llvm { /// ModulePass *createLowerEmuTLSPass(); - /// This pass lowers the @llvm.load.relative intrinsic to instructions. + /// This pass lowers the \@llvm.load.relative intrinsic to instructions. /// This is unsafe to do earlier because a pass may combine the constant /// initializer into the load, which may result in an overflowing evaluation. ModulePass *createPreISelIntrinsicLoweringPass(); @@ -408,7 +423,7 @@ namespace llvm { /// This pass performs outlining on machine instructions directly before /// printing assembly. - ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs = false); + ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true); /// This pass expands the experimental reduction intrinsics into sequences of /// shuffles. @@ -417,6 +432,15 @@ namespace llvm { // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); + /// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp + FunctionPass *createBreakFalseDeps(); + + // This pass expands indirectbr instructions. + FunctionPass *createIndirectBrExpandPass(); + + /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp + FunctionPass *createCFIInstrInserter(); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/ReachingDefAnalysis.h b/include/llvm/CodeGen/ReachingDefAnalysis.h new file mode 100644 index 000000000000..b21b745c8fd1 --- /dev/null +++ b/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -0,0 +1,118 @@ +//==--- llvm/CodeGen/ReachingDefAnalysis.h - Reaching Def Analysis -*- C++ -*---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Reaching Defs Analysis pass. +/// +/// This pass tracks for each instruction what is the “closest” reaching def of +/// a given register. It is used by BreakFalseDeps (for clearance calculation) +/// and ExecutionDomainFix (for arbitrating conflicting domains). +/// +/// Note that this is different from the usual definition notion of liveness. +/// The CPU doesn't care whether or not we consider a register killed. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REACHINGDEFSANALYSIS_H +#define LLVM_CODEGEN_REACHINGDEFSANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineInstr; + +/// This class provides the reaching def analysis. +class ReachingDefAnalysis : public MachineFunctionPass { +private: + MachineFunction *MF; + const TargetRegisterInfo *TRI; + unsigned NumRegUnits; + /// Instruction that defined each register, relative to the beginning of the + /// current basic block. When a LiveRegsDefInfo is used to represent a + /// live-out register, this value is relative to the end of the basic block, + /// so it will be a negative number. + using LiveRegsDefInfo = std::vector<int>; + LiveRegsDefInfo LiveRegs; + + /// Keeps clearance information for all registers. Note that this + /// is different from the usual definition notion of liveness. The CPU + /// doesn't care whether or not we consider a register killed. + using OutRegsInfoMap = SmallVector<LiveRegsDefInfo, 4>; + OutRegsInfoMap MBBOutRegsInfos; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// Maps instructions to their instruction Ids, relative to the begining of + /// their basic blocks. + DenseMap<MachineInstr *, int> InstIds; + + /// All reaching defs of a given RegUnit for a given MBB. + using MBBRegUnitDefs = SmallVector<int, 1>; + /// All reaching defs of all reg units for a given MBB + using MBBDefsInfo = std::vector<MBBRegUnitDefs>; + /// All reaching defs of all reg units for a all MBBs + using MBBReachingDefsInfo = SmallVector<MBBDefsInfo, 4>; + MBBReachingDefsInfo MBBReachingDefs; + + /// Default values are 'nothing happened a long time ago'. + const int ReachingDefDefaultVal = -(1 << 20); + +public: + static char ID; // Pass identification, replacement for typeid + + ReachingDefAnalysis() : MachineFunctionPass(ID) { + initializeReachingDefAnalysisPass(*PassRegistry::getPassRegistry()); + } + void releaseMemory() override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + /// Provides the instruction id of the closest reaching def instruction of + /// PhysReg that reaches MI, relative to the begining of MI's basic block. + int getReachingDef(MachineInstr *MI, int PhysReg); + + /// Provides the clearance - the number of instructions since the closest + /// reaching def instuction of PhysReg that reaches MI. + int getClearance(MachineInstr *MI, MCPhysReg PhysReg); + +private: + /// Set up LiveRegs by merging predecessor live-out values. + void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Update live-out values. + void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Process he given basic block. + void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Update def-ages for registers defined by MI. + /// Also break dependencies on partial defs and undef uses. + void processDefs(MachineInstr *); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REACHINGDEFSANALYSIS_H diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h index 5b342863eb50..ba9763077d09 100644 --- a/include/llvm/CodeGen/RegAllocPBQP.h +++ b/include/llvm/CodeGen/RegAllocPBQP.h @@ -43,10 +43,10 @@ class raw_ostream; namespace PBQP { namespace RegAlloc { -/// @brief Spill option index. +/// Spill option index. inline unsigned getSpillOptionIdx() { return 0; } -/// \brief Metadata to speed allocatability test. +/// Metadata to speed allocatability test. /// /// Keeps track of the number of infinities in each row and column. class MatrixMetadata { @@ -89,7 +89,7 @@ private: std::unique_ptr<bool[]> UnsafeCols; }; -/// \brief Holds a vector of the allowed physical regs for a vreg. +/// Holds a vector of the allowed physical regs for a vreg. class AllowedRegVector { friend hash_code hash_value(const AllowedRegVector &); @@ -127,7 +127,7 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) { hash_combine_range(OStart, OEnd)); } -/// \brief Holds graph-level metadata relevant to PBQP RA problems. +/// Holds graph-level metadata relevant to PBQP RA problems. class GraphMetadata { private: using AllowedRegVecPool = ValuePool<AllowedRegVector>; @@ -164,7 +164,7 @@ private: AllowedRegVecPool AllowedRegVecs; }; -/// \brief Holds solver state and other metadata relevant to each PBQP RA node. +/// Holds solver state and other metadata relevant to each PBQP RA node. class NodeMetadata { public: using AllowedRegVector = RegAlloc::AllowedRegVector; @@ -505,14 +505,14 @@ private: public: PBQPRAGraph(GraphMetadata Metadata) : BaseT(std::move(Metadata)) {} - /// @brief Dump this graph to dbgs(). + /// Dump this graph to dbgs(). void dump() const; - /// @brief Dump this graph to an output stream. + /// Dump this graph to an output stream. /// @param OS Output stream to print on. void dump(raw_ostream &OS) const; - /// @brief Print a representation of this graph in DOT format. + /// Print a representation of this graph in DOT format. /// @param OS Output stream to print on. void printDot(raw_ostream &OS) const; }; @@ -527,7 +527,7 @@ inline Solution solve(PBQPRAGraph& G) { } // end namespace RegAlloc } // end namespace PBQP -/// @brief Create a PBQP register allocator instance. +/// Create a PBQP register allocator instance. FunctionPass * createPBQPRegisterAllocator(char *customPassID = nullptr); diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 2b14b78d621d..79054b9e33b7 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -171,10 +171,10 @@ class RegisterOperands { public: /// List of virtual registers and register units read by the instruction. SmallVector<RegisterMaskPair, 8> Uses; - /// \brief List of virtual registers and register units defined by the + /// List of virtual registers and register units defined by the /// instruction which are not dead. SmallVector<RegisterMaskPair, 8> Defs; - /// \brief List of virtual registers and register units defined by the + /// List of virtual registers and register units defined by the /// instruction but dead. SmallVector<RegisterMaskPair, 8> DeadDefs; @@ -219,7 +219,7 @@ public: return const_cast<PressureDiffs*>(this)->operator[](Idx); } - /// \brief Record pressure difference induced by the given operand list to + /// Record pressure difference induced by the given operand list to /// node with index \p Idx. void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI); @@ -546,7 +546,7 @@ protected: /// Add Reg to the live in set and increase max pressure. void discoverLiveIn(RegisterMaskPair Pair); - /// \brief Get the SlotIndex for the first nondebug instruction including or + /// Get the SlotIndex for the first nondebug instruction including or /// after the current position. SlotIndex getCurrSlot() const; diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 489c72b81a98..b6bd028a8cac 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -127,7 +127,7 @@ public: /// Find an unused register of the specified register class. /// Return 0 if none is found. - unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const; + unsigned FindUnusedReg(const TargetRegisterClass *RC) const; /// Add a scavenging frame index. void addScavengingFrameIndex(int FI) { @@ -158,7 +158,7 @@ public: /// Returns the scavenged register. /// This is deprecated as it depends on the quality of the kill flags being /// present; Use scavengeRegisterBackwards() instead! - unsigned scavengeRegister(const TargetRegisterClass *RegClass, + unsigned scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj); unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) { return scavengeRegister(RegClass, MBBI, SPAdj); @@ -218,7 +218,7 @@ private: /// Spill a register after position \p After and reload it before position /// \p UseMI. ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, - MachineBasicBlock::iterator After, + MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI); }; diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h index eabadd8d784a..efd175eeed30 100644 --- a/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/include/llvm/CodeGen/RegisterUsageInfo.h @@ -19,6 +19,7 @@ #ifndef LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -31,8 +32,6 @@ class Function; class TargetMachine; class PhysicalRegisterUsageInfo : public ImmutablePass { - virtual void anchor(); - public: static char ID; @@ -41,25 +40,20 @@ public: initializePhysicalRegisterUsageInfoPass(Registry); } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - - /// To set TargetMachine *, which is used to print - /// analysis when command line option -print-regusage is used. - void setTargetMachine(const TargetMachine *TM_) { TM = TM_; } + /// Set TargetMachine which is used to print analysis. + void setTargetMachine(const TargetMachine &TM); bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; /// To store RegMask for given Function *. - void storeUpdateRegUsageInfo(const Function *FP, - std::vector<uint32_t> RegMask); + void storeUpdateRegUsageInfo(const Function &FP, + ArrayRef<uint32_t> RegMask); - /// To query stored RegMask for given Function *, it will return nullptr if - /// function is not known. - const std::vector<uint32_t> *getRegUsageInfo(const Function *FP); + /// To query stored RegMask for given Function *, it will returns ane empty + /// array if function is not known. + ArrayRef<uint32_t> getRegUsageInfo(const Function &FP); void print(raw_ostream &OS, const Module *M = nullptr) const override; diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h index 03166ccdfe38..8d582ee298b6 100644 --- a/include/llvm/CodeGen/ResourcePriorityQueue.h +++ b/include/llvm/CodeGen/ResourcePriorityQueue.h @@ -32,7 +32,7 @@ namespace llvm { ResourcePriorityQueue *PQ; explicit resource_sort(ResourcePriorityQueue *pq) : PQ(pq) {} - bool operator()(const SUnit* left, const SUnit* right) const; + bool operator()(const SUnit* LHS, const SUnit* RHS) const; }; class ResourcePriorityQueue : public SchedulingPriorityQueue { @@ -121,7 +121,7 @@ namespace llvm { void remove(SUnit *SU) override; /// scheduledNode - Main resource tracking point. - void scheduledNode(SUnit *Node) override; + void scheduledNode(SUnit *SU) override; bool isResourceAvailable(SUnit *SU); void reserveResources(SUnit *SU); diff --git a/include/llvm/CodeGen/RuntimeLibcalls.def b/include/llvm/CodeGen/RuntimeLibcalls.def deleted file mode 100644 index 7695e9d782ef..000000000000 --- a/include/llvm/CodeGen/RuntimeLibcalls.def +++ /dev/null @@ -1,495 +0,0 @@ -//===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the runtime library calls the backend can emit. -// The various long double types cannot be merged, because 80-bit library -// functions use "xf" and 128-bit use "tf". -// -// When adding PPCF128 functions here, note that their names generally need -// to be overridden for Darwin with the xxx$LDBL128 form. See -// PPCISelLowering.cpp. -// -//===----------------------------------------------------------------------===// - -// NOTE: NO INCLUDE GUARD DESIRED! - -// Provide definitions of macros so that users of this file do not have to -// define everything to use it... - -// Declare the enumerator for each libcall, along with its default name. Some -// libcalls have different names on particular OSes or architectures. These -// are set in InitLibcallNames() in TargetLoweringBase.cpp and/or by targets -// using TargetLoweringBase::setLibcallName() -#ifndef HANDLE_LIBCALL -#error "HANDLE_LIBCALL must be defined" -#endif - -// Integer -HANDLE_LIBCALL(SHL_I16, "__ashlhi3") -HANDLE_LIBCALL(SHL_I32, "__ashlsi3") -HANDLE_LIBCALL(SHL_I64, "__ashldi3") -HANDLE_LIBCALL(SHL_I128, "__ashlti3") -HANDLE_LIBCALL(SRL_I16, "__lshrhi3") -HANDLE_LIBCALL(SRL_I32, "__lshrsi3") -HANDLE_LIBCALL(SRL_I64, "__lshrdi3") -HANDLE_LIBCALL(SRL_I128, "__lshrti3") -HANDLE_LIBCALL(SRA_I16, "__ashrhi3") -HANDLE_LIBCALL(SRA_I32, "__ashrsi3") -HANDLE_LIBCALL(SRA_I64, "__ashrdi3") -HANDLE_LIBCALL(SRA_I128, "__ashrti3") -HANDLE_LIBCALL(MUL_I8, "__mulqi3") -HANDLE_LIBCALL(MUL_I16, "__mulhi3") -HANDLE_LIBCALL(MUL_I32, "__mulsi3") -HANDLE_LIBCALL(MUL_I64, "__muldi3") -HANDLE_LIBCALL(MUL_I128, "__multi3") -HANDLE_LIBCALL(MULO_I32, "__mulosi4") -HANDLE_LIBCALL(MULO_I64, "__mulodi4") -HANDLE_LIBCALL(MULO_I128, "__muloti4") -HANDLE_LIBCALL(SDIV_I8, "__divqi3") -HANDLE_LIBCALL(SDIV_I16, "__divhi3") -HANDLE_LIBCALL(SDIV_I32, "__divsi3") -HANDLE_LIBCALL(SDIV_I64, "__divdi3") -HANDLE_LIBCALL(SDIV_I128, "__divti3") -HANDLE_LIBCALL(UDIV_I8, "__udivqi3") -HANDLE_LIBCALL(UDIV_I16, "__udivhi3") -HANDLE_LIBCALL(UDIV_I32, "__udivsi3") -HANDLE_LIBCALL(UDIV_I64, "__udivdi3") -HANDLE_LIBCALL(UDIV_I128, "__udivti3") -HANDLE_LIBCALL(SREM_I8, "__modqi3") -HANDLE_LIBCALL(SREM_I16, "__modhi3") -HANDLE_LIBCALL(SREM_I32, "__modsi3") -HANDLE_LIBCALL(SREM_I64, "__moddi3") -HANDLE_LIBCALL(SREM_I128, "__modti3") -HANDLE_LIBCALL(UREM_I8, "__umodqi3") -HANDLE_LIBCALL(UREM_I16, "__umodhi3") -HANDLE_LIBCALL(UREM_I32, "__umodsi3") -HANDLE_LIBCALL(UREM_I64, "__umoddi3") -HANDLE_LIBCALL(UREM_I128, "__umodti3") -HANDLE_LIBCALL(SDIVREM_I8, nullptr) -HANDLE_LIBCALL(SDIVREM_I16, nullptr) -HANDLE_LIBCALL(SDIVREM_I32, nullptr) -HANDLE_LIBCALL(SDIVREM_I64, nullptr) -HANDLE_LIBCALL(SDIVREM_I128, nullptr) -HANDLE_LIBCALL(UDIVREM_I8, nullptr) -HANDLE_LIBCALL(UDIVREM_I16, nullptr) -HANDLE_LIBCALL(UDIVREM_I32, nullptr) -HANDLE_LIBCALL(UDIVREM_I64, nullptr) -HANDLE_LIBCALL(UDIVREM_I128, nullptr) -HANDLE_LIBCALL(NEG_I32, "__negsi2") -HANDLE_LIBCALL(NEG_I64, "__negdi2") - -// Floating-point -HANDLE_LIBCALL(ADD_F32, "__addsf3") -HANDLE_LIBCALL(ADD_F64, "__adddf3") -HANDLE_LIBCALL(ADD_F80, "__addxf3") -HANDLE_LIBCALL(ADD_F128, "__addtf3") -HANDLE_LIBCALL(ADD_PPCF128, "__gcc_qadd") -HANDLE_LIBCALL(SUB_F32, "__subsf3") -HANDLE_LIBCALL(SUB_F64, "__subdf3") -HANDLE_LIBCALL(SUB_F80, "__subxf3") -HANDLE_LIBCALL(SUB_F128, "__subtf3") -HANDLE_LIBCALL(SUB_PPCF128, "__gcc_qsub") -HANDLE_LIBCALL(MUL_F32, "__mulsf3") -HANDLE_LIBCALL(MUL_F64, "__muldf3") -HANDLE_LIBCALL(MUL_F80, "__mulxf3") -HANDLE_LIBCALL(MUL_F128, "__multf3") -HANDLE_LIBCALL(MUL_PPCF128, "__gcc_qmul") -HANDLE_LIBCALL(DIV_F32, "__divsf3") -HANDLE_LIBCALL(DIV_F64, "__divdf3") -HANDLE_LIBCALL(DIV_F80, "__divxf3") -HANDLE_LIBCALL(DIV_F128, "__divtf3") -HANDLE_LIBCALL(DIV_PPCF128, "__gcc_qdiv") -HANDLE_LIBCALL(REM_F32, "fmodf") -HANDLE_LIBCALL(REM_F64, "fmod") -HANDLE_LIBCALL(REM_F80, "fmodl") -HANDLE_LIBCALL(REM_F128, "fmodl") -HANDLE_LIBCALL(REM_PPCF128, "fmodl") -HANDLE_LIBCALL(FMA_F32, "fmaf") -HANDLE_LIBCALL(FMA_F64, "fma") -HANDLE_LIBCALL(FMA_F80, "fmal") -HANDLE_LIBCALL(FMA_F128, "fmal") -HANDLE_LIBCALL(FMA_PPCF128, "fmal") -HANDLE_LIBCALL(POWI_F32, "__powisf2") -HANDLE_LIBCALL(POWI_F64, "__powidf2") -HANDLE_LIBCALL(POWI_F80, "__powixf2") -HANDLE_LIBCALL(POWI_F128, "__powitf2") -HANDLE_LIBCALL(POWI_PPCF128, "__powitf2") -HANDLE_LIBCALL(SQRT_F32, "sqrtf") -HANDLE_LIBCALL(SQRT_F64, "sqrt") -HANDLE_LIBCALL(SQRT_F80, "sqrtl") -HANDLE_LIBCALL(SQRT_F128, "sqrtl") -HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl") -HANDLE_LIBCALL(LOG_F32, "logf") -HANDLE_LIBCALL(LOG_F64, "log") -HANDLE_LIBCALL(LOG_F80, "logl") -HANDLE_LIBCALL(LOG_F128, "logl") -HANDLE_LIBCALL(LOG_PPCF128, "logl") -HANDLE_LIBCALL(LOG2_F32, "log2f") -HANDLE_LIBCALL(LOG2_F64, "log2") -HANDLE_LIBCALL(LOG2_F80, "log2l") -HANDLE_LIBCALL(LOG2_F128, "log2l") -HANDLE_LIBCALL(LOG2_PPCF128, "log2l") -HANDLE_LIBCALL(LOG10_F32, "log10f") -HANDLE_LIBCALL(LOG10_F64, "log10") -HANDLE_LIBCALL(LOG10_F80, "log10l") -HANDLE_LIBCALL(LOG10_F128, "log10l") -HANDLE_LIBCALL(LOG10_PPCF128, "log10l") -HANDLE_LIBCALL(EXP_F32, "expf") -HANDLE_LIBCALL(EXP_F64, "exp") -HANDLE_LIBCALL(EXP_F80, "expl") -HANDLE_LIBCALL(EXP_F128, "expl") -HANDLE_LIBCALL(EXP_PPCF128, "expl") -HANDLE_LIBCALL(EXP2_F32, "exp2f") -HANDLE_LIBCALL(EXP2_F64, "exp2") -HANDLE_LIBCALL(EXP2_F80, "exp2l") -HANDLE_LIBCALL(EXP2_F128, "exp2l") -HANDLE_LIBCALL(EXP2_PPCF128, "exp2l") -HANDLE_LIBCALL(SIN_F32, "sinf") -HANDLE_LIBCALL(SIN_F64, "sin") -HANDLE_LIBCALL(SIN_F80, "sinl") -HANDLE_LIBCALL(SIN_F128, "sinl") -HANDLE_LIBCALL(SIN_PPCF128, "sinl") -HANDLE_LIBCALL(COS_F32, "cosf") -HANDLE_LIBCALL(COS_F64, "cos") -HANDLE_LIBCALL(COS_F80, "cosl") -HANDLE_LIBCALL(COS_F128, "cosl") -HANDLE_LIBCALL(COS_PPCF128, "cosl") -HANDLE_LIBCALL(SINCOS_F32, nullptr) -HANDLE_LIBCALL(SINCOS_F64, nullptr) -HANDLE_LIBCALL(SINCOS_F80, nullptr) -HANDLE_LIBCALL(SINCOS_F128, nullptr) -HANDLE_LIBCALL(SINCOS_PPCF128, nullptr) -HANDLE_LIBCALL(SINCOS_STRET_F32, nullptr) -HANDLE_LIBCALL(SINCOS_STRET_F64, nullptr) -HANDLE_LIBCALL(POW_F32, "powf") -HANDLE_LIBCALL(POW_F64, "pow") -HANDLE_LIBCALL(POW_F80, "powl") -HANDLE_LIBCALL(POW_F128, "powl") -HANDLE_LIBCALL(POW_PPCF128, "powl") -HANDLE_LIBCALL(CEIL_F32, "ceilf") -HANDLE_LIBCALL(CEIL_F64, "ceil") -HANDLE_LIBCALL(CEIL_F80, "ceill") -HANDLE_LIBCALL(CEIL_F128, "ceill") -HANDLE_LIBCALL(CEIL_PPCF128, "ceill") -HANDLE_LIBCALL(TRUNC_F32, "truncf") -HANDLE_LIBCALL(TRUNC_F64, "trunc") -HANDLE_LIBCALL(TRUNC_F80, "truncl") -HANDLE_LIBCALL(TRUNC_F128, "truncl") -HANDLE_LIBCALL(TRUNC_PPCF128, "truncl") -HANDLE_LIBCALL(RINT_F32, "rintf") -HANDLE_LIBCALL(RINT_F64, "rint") -HANDLE_LIBCALL(RINT_F80, "rintl") -HANDLE_LIBCALL(RINT_F128, "rintl") -HANDLE_LIBCALL(RINT_PPCF128, "rintl") -HANDLE_LIBCALL(NEARBYINT_F32, "nearbyintf") -HANDLE_LIBCALL(NEARBYINT_F64, "nearbyint") -HANDLE_LIBCALL(NEARBYINT_F80, "nearbyintl") -HANDLE_LIBCALL(NEARBYINT_F128, "nearbyintl") -HANDLE_LIBCALL(NEARBYINT_PPCF128, "nearbyintl") -HANDLE_LIBCALL(ROUND_F32, "roundf") -HANDLE_LIBCALL(ROUND_F64, "round") -HANDLE_LIBCALL(ROUND_F80, "roundl") -HANDLE_LIBCALL(ROUND_F128, "roundl") -HANDLE_LIBCALL(ROUND_PPCF128, "roundl") -HANDLE_LIBCALL(FLOOR_F32, "floorf") -HANDLE_LIBCALL(FLOOR_F64, "floor") -HANDLE_LIBCALL(FLOOR_F80, "floorl") -HANDLE_LIBCALL(FLOOR_F128, "floorl") -HANDLE_LIBCALL(FLOOR_PPCF128, "floorl") -HANDLE_LIBCALL(COPYSIGN_F32, "copysignf") -HANDLE_LIBCALL(COPYSIGN_F64, "copysign") -HANDLE_LIBCALL(COPYSIGN_F80, "copysignl") -HANDLE_LIBCALL(COPYSIGN_F128, "copysignl") -HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl") -HANDLE_LIBCALL(FMIN_F32, "fminf") -HANDLE_LIBCALL(FMIN_F64, "fmin") -HANDLE_LIBCALL(FMIN_F80, "fminl") -HANDLE_LIBCALL(FMIN_F128, "fminl") -HANDLE_LIBCALL(FMIN_PPCF128, "fminl") -HANDLE_LIBCALL(FMAX_F32, "fmaxf") -HANDLE_LIBCALL(FMAX_F64, "fmax") -HANDLE_LIBCALL(FMAX_F80, "fmaxl") -HANDLE_LIBCALL(FMAX_F128, "fmaxl") -HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl") - -// Conversion -HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq") -HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq") -HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2") -HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2") -HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") -HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee") -HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee") -HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2") -HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2") -HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2") -HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2") -HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2") -HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2") -HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2") -HANDLE_LIBCALL(FPROUND_PPCF128_F32, "__gcc_qtos") -HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2") -HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2") -HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod") -HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi") -HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi") -HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti") -HANDLE_LIBCALL(FPTOSINT_F64_I32, "__fixdfsi") -HANDLE_LIBCALL(FPTOSINT_F64_I64, "__fixdfdi") -HANDLE_LIBCALL(FPTOSINT_F64_I128, "__fixdfti") -HANDLE_LIBCALL(FPTOSINT_F80_I32, "__fixxfsi") -HANDLE_LIBCALL(FPTOSINT_F80_I64, "__fixxfdi") -HANDLE_LIBCALL(FPTOSINT_F80_I128, "__fixxfti") -HANDLE_LIBCALL(FPTOSINT_F128_I32, "__fixtfsi") -HANDLE_LIBCALL(FPTOSINT_F128_I64, "__fixtfdi") -HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti") -HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou") -HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi") -HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti") -HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi") -HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi") -HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti") -HANDLE_LIBCALL(FPTOUINT_F64_I32, "__fixunsdfsi") -HANDLE_LIBCALL(FPTOUINT_F64_I64, "__fixunsdfdi") -HANDLE_LIBCALL(FPTOUINT_F64_I128, "__fixunsdfti") -HANDLE_LIBCALL(FPTOUINT_F80_I32, "__fixunsxfsi") -HANDLE_LIBCALL(FPTOUINT_F80_I64, "__fixunsxfdi") -HANDLE_LIBCALL(FPTOUINT_F80_I128, "__fixunsxfti") -HANDLE_LIBCALL(FPTOUINT_F128_I32, "__fixunstfsi") -HANDLE_LIBCALL(FPTOUINT_F128_I64, "__fixunstfdi") -HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti") -HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi") -HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi") -HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti") -HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf") -HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf") -HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf") -HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf") -HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq") -HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf") -HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf") -HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf") -HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf") -HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf") -HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf") -HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf") -HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf") -HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf") -HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf") -HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf") -HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf") -HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf") -HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf") -HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq") -HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf") -HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf") -HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf") -HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf") -HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf") -HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf") -HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf") -HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf") -HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf") -HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf") - -// Comparison -HANDLE_LIBCALL(OEQ_F32, "__eqsf2") -HANDLE_LIBCALL(OEQ_F64, "__eqdf2") -HANDLE_LIBCALL(OEQ_F128, "__eqtf2") -HANDLE_LIBCALL(OEQ_PPCF128, "__gcc_qeq") -HANDLE_LIBCALL(UNE_F32, "__nesf2") -HANDLE_LIBCALL(UNE_F64, "__nedf2") -HANDLE_LIBCALL(UNE_F128, "__netf2") -HANDLE_LIBCALL(UNE_PPCF128, "__gcc_qne") -HANDLE_LIBCALL(OGE_F32, "__gesf2") -HANDLE_LIBCALL(OGE_F64, "__gedf2") -HANDLE_LIBCALL(OGE_F128, "__getf2") -HANDLE_LIBCALL(OGE_PPCF128, "__gcc_qge") -HANDLE_LIBCALL(OLT_F32, "__ltsf2") -HANDLE_LIBCALL(OLT_F64, "__ltdf2") -HANDLE_LIBCALL(OLT_F128, "__lttf2") -HANDLE_LIBCALL(OLT_PPCF128, "__gcc_qlt") -HANDLE_LIBCALL(OLE_F32, "__lesf2") -HANDLE_LIBCALL(OLE_F64, "__ledf2") -HANDLE_LIBCALL(OLE_F128, "__letf2") -HANDLE_LIBCALL(OLE_PPCF128, "__gcc_qle") -HANDLE_LIBCALL(OGT_F32, "__gtsf2") -HANDLE_LIBCALL(OGT_F64, "__gtdf2") -HANDLE_LIBCALL(OGT_F128, "__gttf2") -HANDLE_LIBCALL(OGT_PPCF128, "__gcc_qgt") -HANDLE_LIBCALL(UO_F32, "__unordsf2") -HANDLE_LIBCALL(UO_F64, "__unorddf2") -HANDLE_LIBCALL(UO_F128, "__unordtf2") -HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord") -HANDLE_LIBCALL(O_F32, "__unordsf2") -HANDLE_LIBCALL(O_F64, "__unorddf2") -HANDLE_LIBCALL(O_F128, "__unordtf2") -HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord") - -// Memory -HANDLE_LIBCALL(MEMCPY, "memcpy") -HANDLE_LIBCALL(MEMMOVE, "memmove") -HANDLE_LIBCALL(MEMSET, "memset") -HANDLE_LIBCALL(BZERO, nullptr) - -// Element-wise unordered-atomic memory of different sizes -HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1") -HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memcpy_element_unordered_atomic_2") -HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memcpy_element_unordered_atomic_4") -HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memcpy_element_unordered_atomic_8") -HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memcpy_element_unordered_atomic_16") -HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memmove_element_unordered_atomic_1") -HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memmove_element_unordered_atomic_2") -HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memmove_element_unordered_atomic_4") -HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memmove_element_unordered_atomic_8") -HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memmove_element_unordered_atomic_16") -HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memset_element_unordered_atomic_1") -HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memset_element_unordered_atomic_2") -HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memset_element_unordered_atomic_4") -HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memset_element_unordered_atomic_8") -HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unordered_atomic_16") - -// Exception handling -HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume") - -// Note: there are two sets of atomics libcalls; see -// <https://llvm.org/docs/Atomics.html> for more info on the -// difference between them. - -// Atomic '__sync_*' libcalls. -HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_1, "__sync_val_compare_and_swap_1") -HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_2, "__sync_val_compare_and_swap_2") -HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_4, "__sync_val_compare_and_swap_4") -HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_8, "__sync_val_compare_and_swap_8") -HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_16, "__sync_val_compare_and_swap_16") -HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_1, "__sync_lock_test_and_set_1") -HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_2, "__sync_lock_test_and_set_2") -HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_4, "__sync_lock_test_and_set_4") -HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_8, "__sync_lock_test_and_set_8") -HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_16, "__sync_lock_test_and_set_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_1, "__sync_fetch_and_add_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_2, "__sync_fetch_and_add_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_4, "__sync_fetch_and_add_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_8, "__sync_fetch_and_add_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_16, "__sync_fetch_and_add_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_1, "__sync_fetch_and_sub_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_2, "__sync_fetch_and_sub_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_4, "__sync_fetch_and_sub_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_8, "__sync_fetch_and_sub_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_16, "__sync_fetch_and_sub_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_AND_1, "__sync_fetch_and_and_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_AND_2, "__sync_fetch_and_and_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_AND_4, "__sync_fetch_and_and_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_AND_8, "__sync_fetch_and_and_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_AND_16, "__sync_fetch_and_and_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_OR_1, "__sync_fetch_and_or_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_OR_2, "__sync_fetch_and_or_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_OR_4, "__sync_fetch_and_or_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_OR_8, "__sync_fetch_and_or_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_OR_16, "__sync_fetch_and_or_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_1, "__sync_fetch_and_xor_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_2, "__sync_fetch_and_xor_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_4, "__sync_fetch_and_xor_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_8, "__sync_fetch_and_xor_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_16, "__sync_fetch_and_xor_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_1, "__sync_fetch_and_nand_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_2, "__sync_fetch_and_nand_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_4, "__sync_fetch_and_nand_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_8, "__sync_fetch_and_nand_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_16, "__sync_fetch_and_nand_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_1, "__sync_fetch_and_max_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_2, "__sync_fetch_and_max_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_4, "__sync_fetch_and_max_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_8, "__sync_fetch_and_max_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_16, "__sync_fetch_and_max_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_1, "__sync_fetch_and_umax_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_2, "__sync_fetch_and_umax_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_4, "__sync_fetch_and_umax_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_8, "__sync_fetch_and_umax_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_16, "__sync_fetch_and_umax_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_1, "__sync_fetch_and_min_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_2, "__sync_fetch_and_min_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_4, "__sync_fetch_and_min_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_8, "__sync_fetch_and_min_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_16, "__sync_fetch_and_min_16") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_1, "__sync_fetch_and_umin_1") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_2, "__sync_fetch_and_umin_2") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_4, "__sync_fetch_and_umin_4") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_8, "__sync_fetch_and_umin_8") -HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_16, "__sync_fetch_and_umin_16") - -// Atomic `__atomic_*' libcalls. -HANDLE_LIBCALL(ATOMIC_LOAD, "__atomic_load") -HANDLE_LIBCALL(ATOMIC_LOAD_1, "__atomic_load_1") -HANDLE_LIBCALL(ATOMIC_LOAD_2, "__atomic_load_2") -HANDLE_LIBCALL(ATOMIC_LOAD_4, "__atomic_load_4") -HANDLE_LIBCALL(ATOMIC_LOAD_8, "__atomic_load_8") -HANDLE_LIBCALL(ATOMIC_LOAD_16, "__atomic_load_16") - -HANDLE_LIBCALL(ATOMIC_STORE, "__atomic_store") -HANDLE_LIBCALL(ATOMIC_STORE_1, "__atomic_store_1") -HANDLE_LIBCALL(ATOMIC_STORE_2, "__atomic_store_2") -HANDLE_LIBCALL(ATOMIC_STORE_4, "__atomic_store_4") -HANDLE_LIBCALL(ATOMIC_STORE_8, "__atomic_store_8") -HANDLE_LIBCALL(ATOMIC_STORE_16, "__atomic_store_16") - -HANDLE_LIBCALL(ATOMIC_EXCHANGE, "__atomic_exchange") -HANDLE_LIBCALL(ATOMIC_EXCHANGE_1, "__atomic_exchange_1") -HANDLE_LIBCALL(ATOMIC_EXCHANGE_2, "__atomic_exchange_2") -HANDLE_LIBCALL(ATOMIC_EXCHANGE_4, "__atomic_exchange_4") -HANDLE_LIBCALL(ATOMIC_EXCHANGE_8, "__atomic_exchange_8") -HANDLE_LIBCALL(ATOMIC_EXCHANGE_16, "__atomic_exchange_16") - -HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE, "__atomic_compare_exchange") -HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_1, "__atomic_compare_exchange_1") -HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_2, "__atomic_compare_exchange_2") -HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_4, "__atomic_compare_exchange_4") -HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_8, "__atomic_compare_exchange_8") -HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_16, "__atomic_compare_exchange_16") - -HANDLE_LIBCALL(ATOMIC_FETCH_ADD_1, "__atomic_fetch_add_1") -HANDLE_LIBCALL(ATOMIC_FETCH_ADD_2, "__atomic_fetch_add_2") -HANDLE_LIBCALL(ATOMIC_FETCH_ADD_4, "__atomic_fetch_add_4") -HANDLE_LIBCALL(ATOMIC_FETCH_ADD_8, "__atomic_fetch_add_8") -HANDLE_LIBCALL(ATOMIC_FETCH_ADD_16, "__atomic_fetch_add_16") -HANDLE_LIBCALL(ATOMIC_FETCH_SUB_1, "__atomic_fetch_sub_1") -HANDLE_LIBCALL(ATOMIC_FETCH_SUB_2, "__atomic_fetch_sub_2") -HANDLE_LIBCALL(ATOMIC_FETCH_SUB_4, "__atomic_fetch_sub_4") -HANDLE_LIBCALL(ATOMIC_FETCH_SUB_8, "__atomic_fetch_sub_8") -HANDLE_LIBCALL(ATOMIC_FETCH_SUB_16, "__atomic_fetch_sub_16") -HANDLE_LIBCALL(ATOMIC_FETCH_AND_1, "__atomic_fetch_and_1") -HANDLE_LIBCALL(ATOMIC_FETCH_AND_2, "__atomic_fetch_and_2") -HANDLE_LIBCALL(ATOMIC_FETCH_AND_4, "__atomic_fetch_and_4") -HANDLE_LIBCALL(ATOMIC_FETCH_AND_8, "__atomic_fetch_and_8") -HANDLE_LIBCALL(ATOMIC_FETCH_AND_16, "__atomic_fetch_and_16") -HANDLE_LIBCALL(ATOMIC_FETCH_OR_1, "__atomic_fetch_or_1") -HANDLE_LIBCALL(ATOMIC_FETCH_OR_2, "__atomic_fetch_or_2") -HANDLE_LIBCALL(ATOMIC_FETCH_OR_4, "__atomic_fetch_or_4") -HANDLE_LIBCALL(ATOMIC_FETCH_OR_8, "__atomic_fetch_or_8") -HANDLE_LIBCALL(ATOMIC_FETCH_OR_16, "__atomic_fetch_or_16") -HANDLE_LIBCALL(ATOMIC_FETCH_XOR_1, "__atomic_fetch_xor_1") -HANDLE_LIBCALL(ATOMIC_FETCH_XOR_2, "__atomic_fetch_xor_2") -HANDLE_LIBCALL(ATOMIC_FETCH_XOR_4, "__atomic_fetch_xor_4") -HANDLE_LIBCALL(ATOMIC_FETCH_XOR_8, "__atomic_fetch_xor_8") -HANDLE_LIBCALL(ATOMIC_FETCH_XOR_16, "__atomic_fetch_xor_16") -HANDLE_LIBCALL(ATOMIC_FETCH_NAND_1, "__atomic_fetch_nand_1") -HANDLE_LIBCALL(ATOMIC_FETCH_NAND_2, "__atomic_fetch_nand_2") -HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4") -HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8") -HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16") - -// Stack Protector Fail -HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail") - -// Deoptimization -HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize") - -HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr) - -#undef HANDLE_LIBCALL diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 016bef1702c4..28567a1ce437 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -29,7 +29,7 @@ namespace RTLIB { /// enum Libcall { #define HANDLE_LIBCALL(code, name) code, - #include "RuntimeLibcalls.def" + #include "llvm/IR/RuntimeLibcalls.def" #undef HANDLE_LIBCALL }; diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index f3f2f05b877d..5e7837834ec8 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -76,7 +76,7 @@ class TargetRegisterInfo; }; private: - /// \brief A pointer to the depending/depended-on SUnit, and an enum + /// A pointer to the depending/depended-on SUnit, and an enum /// indicating the kind of the dependency. PointerIntPair<SUnit *, 2, Kind> Dep; @@ -137,7 +137,7 @@ class TargetRegisterInfo; return !operator==(Other); } - /// \brief Returns the latency value for this edge, which roughly means the + /// Returns the latency value for this edge, which roughly means the /// minimum number of cycles that must elapse between the predecessor and /// the successor, given that they have this edge between them. unsigned getLatency() const { @@ -163,7 +163,7 @@ class TargetRegisterInfo; return getKind() != Data; } - /// \brief Tests if this is an Order dependence between two memory accesses + /// Tests if this is an Order dependence between two memory accesses /// where both sides of the dependence access memory in non-volatile and /// fully modeled ways. bool isNormalMemory() const { @@ -181,7 +181,7 @@ class TargetRegisterInfo; return (isNormalMemory() || isBarrier()); } - /// \brief Tests if this is an Order dependence that is marked as + /// Tests if this is an Order dependence that is marked as /// "must alias", meaning that the SUnits at either end of the edge have a /// memory dependence on a known memory location. bool isMustAlias() const { @@ -196,13 +196,13 @@ class TargetRegisterInfo; return getKind() == Order && Contents.OrdKind >= Weak; } - /// \brief Tests if this is an Order dependence that is marked as + /// Tests if this is an Order dependence that is marked as /// "artificial", meaning it isn't necessary for correctness. bool isArtificial() const { return getKind() == Order && Contents.OrdKind == Artificial; } - /// \brief Tests if this is an Order dependence that is marked as "cluster", + /// Tests if this is an Order dependence that is marked as "cluster", /// meaning it is artificial and wants to be adjacent. bool isCluster() const { return getKind() == Order && Contents.OrdKind == Cluster; @@ -308,7 +308,7 @@ class TargetRegisterInfo; nullptr; ///< Is a special copy node if != nullptr. const TargetRegisterClass *CopySrcRC = nullptr; - /// \brief Constructs an SUnit for pre-regalloc scheduling to represent an + /// Constructs an SUnit for pre-regalloc scheduling to represent an /// SDNode and any nodes flagged to it. SUnit(SDNode *node, unsigned nodenum) : Node(node), NodeNum(nodenum), isVRegCycle(false), isCall(false), @@ -319,7 +319,7 @@ class TargetRegisterInfo; isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false), isHeightCurrent(false) {} - /// \brief Constructs an SUnit for post-regalloc scheduling to represent a + /// Constructs an SUnit for post-regalloc scheduling to represent a /// MachineInstr. SUnit(MachineInstr *instr, unsigned nodenum) : Instr(instr), NodeNum(nodenum), isVRegCycle(false), isCall(false), @@ -330,7 +330,7 @@ class TargetRegisterInfo; isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false), isHeightCurrent(false) {} - /// \brief Constructs a placeholder SUnit. + /// Constructs a placeholder SUnit. SUnit() : isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), isCommutable(false), hasPhysRegUses(false), hasPhysRegDefs(false), @@ -339,7 +339,7 @@ class TargetRegisterInfo; isCloned(false), isUnbuffered(false), hasReservedResource(false), isDepthCurrent(false), isHeightCurrent(false) {} - /// \brief Boundary nodes are placeholders for the boundary of the + /// Boundary nodes are placeholders for the boundary of the /// scheduling region. /// /// BoundaryNodes can have DAG edges, including Data edges, but they do not @@ -362,7 +362,7 @@ class TargetRegisterInfo; return Node; } - /// \brief Returns true if this SUnit refers to a machine instruction as + /// Returns true if this SUnit refers to a machine instruction as /// opposed to an SDNode. bool isInstr() const { return Instr; } @@ -384,7 +384,7 @@ class TargetRegisterInfo; /// It also adds the current node as a successor of the specified node. bool addPred(const SDep &D, bool Required = true); - /// \brief Adds a barrier edge to SU by calling addPred(), with latency 0 + /// Adds a barrier edge to SU by calling addPred(), with latency 0 /// generally or latency 1 for a store followed by a load. bool addPredBarrier(SUnit *SU) { SDep Dep(SU, SDep::Barrier); @@ -406,7 +406,7 @@ class TargetRegisterInfo; return Depth; } - /// \brief Returns the height of this node, which is the length of the + /// Returns the height of this node, which is the length of the /// maximum path down to any node which has no successors. unsigned getHeight() const { if (!isHeightCurrent) @@ -414,21 +414,21 @@ class TargetRegisterInfo; return Height; } - /// \brief If NewDepth is greater than this node's depth value, sets it to + /// If NewDepth is greater than this node's depth value, sets it to /// be the new depth value. This also recursively marks successor nodes /// dirty. void setDepthToAtLeast(unsigned NewDepth); - /// \brief If NewDepth is greater than this node's depth value, set it to be + /// If NewDepth is greater than this node's depth value, set it to be /// the new height value. This also recursively marks predecessor nodes /// dirty. void setHeightToAtLeast(unsigned NewHeight); - /// \brief Sets a flag in this node to indicate that its stored Depth value + /// Sets a flag in this node to indicate that its stored Depth value /// will require recomputation the next time getDepth() is called. void setDepthDirty(); - /// \brief Sets a flag in this node to indicate that its stored Height value + /// Sets a flag in this node to indicate that its stored Height value /// will require recomputation the next time getHeight() is called. void setHeightDirty(); @@ -455,15 +455,15 @@ class TargetRegisterInfo; return NumSuccsLeft == 0; } - /// \brief Orders this node's predecessor edges such that the critical path + /// Orders this node's predecessor edges such that the critical path /// edge occurs first. void biasCriticalPath(); void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; raw_ostream &print(raw_ostream &O, - const SUnit *N = nullptr, - const SUnit *X = nullptr) const; + const SUnit *Entry = nullptr, + const SUnit *Exit = nullptr) const; raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const; private: @@ -497,7 +497,7 @@ class TargetRegisterInfo; //===--------------------------------------------------------------------===// - /// \brief This interface is used to plug different priorities computation + /// This interface is used to plug different priorities computation /// algorithms into the list scheduler. It implements the interface of a /// standard priority queue, where nodes are inserted in arbitrary order and /// returned in priority order. The computation of the priority and the @@ -609,7 +609,7 @@ class TargetRegisterInfo; virtual void addCustomGraphFeatures(GraphWriter<ScheduleDAG*> &) const {} #ifndef NDEBUG - /// \brief Verifies that all SUnits were scheduled and that their state is + /// Verifies that all SUnits were scheduled and that their state is /// consistent. Returns the number of scheduled SUnits. unsigned VerifyScheduledDAG(bool isBottomUp); #endif @@ -708,7 +708,7 @@ class TargetRegisterInfo; /// method. void DFS(const SUnit *SU, int UpperBound, bool& HasLoop); - /// \brief Reassigns topological indexes for the nodes in the DAG to + /// Reassigns topological indexes for the nodes in the DAG to /// preserve the topological ordering. void Shift(BitVector& Visited, int LowerBound, int UpperBound); @@ -735,11 +735,11 @@ class TargetRegisterInfo; /// Returns true if addPred(TargetSU, SU) creates a cycle. bool WillCreateCycle(SUnit *TargetSU, SUnit *SU); - /// \brief Updates the topological ordering to accommodate an edge to be + /// Updates the topological ordering to accommodate an edge to be /// added from SUnit \p X to SUnit \p Y. void AddPred(SUnit *Y, SUnit *X); - /// \brief Updates the topological ordering to accommodate an an edge to be + /// Updates the topological ordering to accommodate an an edge to be /// removed from the specified node \p N from the predecessors of the /// current node \p M. void RemovePred(SUnit *M, SUnit *N); diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 14882205584e..520a23846f6e 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -190,7 +190,7 @@ namespace llvm { using SUList = std::list<SUnit *>; protected: - /// \brief A map from ValueType to SUList, used during DAG construction, as + /// A map from ValueType to SUList, used during DAG construction, as /// a means of remembering which SUs depend on which memory locations. class Value2SUsMap; @@ -201,7 +201,7 @@ namespace llvm { void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N); - /// \brief Adds a chain edge between SUa and SUb, but only if both + /// Adds a chain edge between SUa and SUb, but only if both /// AliasAnalysis and Target fail to deny the dependency. void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency = 0); @@ -286,7 +286,7 @@ namespace llvm { /// Cleans up after scheduling in the given block. virtual void finishBlock(); - /// \brief Initialize the DAG and common scheduler state for a new + /// Initialize the DAG and common scheduler state for a new /// scheduling region. This does not actually create the DAG, only clears /// it. The scheduling driver may call BuildSchedGraph multiple times per /// scheduling region. @@ -308,7 +308,7 @@ namespace llvm { LiveIntervals *LIS = nullptr, bool TrackLaneMasks = false); - /// \brief Adds dependencies from instructions in the current list of + /// Adds dependencies from instructions in the current list of /// instructions being scheduled to scheduling barrier. We want to make sure /// instructions which define registers that are either used by the /// terminator or are live-out are properly scheduled. This is especially diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h index d6a8c791392c..3ecc033ac35a 100644 --- a/include/llvm/CodeGen/ScheduleDFS.h +++ b/include/llvm/CodeGen/ScheduleDFS.h @@ -25,7 +25,7 @@ namespace llvm { class raw_ostream; -/// \brief Represent the ILP of the subDAG rooted at a DAG node. +/// Represent the ILP of the subDAG rooted at a DAG node. /// /// ILPValues summarize the DAG subtree rooted at each node. ILPValues are /// valid for all nodes regardless of their subtree membership. @@ -62,13 +62,13 @@ struct ILPValue { void dump() const; }; -/// \brief Compute the values of each DAG node for various metrics during DFS. +/// Compute the values of each DAG node for various metrics during DFS. class SchedDFSResult { friend class SchedDFSImpl; static const unsigned InvalidSubtreeID = ~0u; - /// \brief Per-SUnit data computed during DFS for various metrics. + /// Per-SUnit data computed during DFS for various metrics. /// /// A node's SubtreeID is set to itself when it is visited to indicate that it /// is the root of a subtree. Later it is set to its parent to indicate an @@ -81,7 +81,7 @@ class SchedDFSResult { NodeData() = default; }; - /// \brief Per-Subtree data computed during DFS. + /// Per-Subtree data computed during DFS. struct TreeData { unsigned ParentTreeID = InvalidSubtreeID; unsigned SubInstrCount = 0; @@ -89,7 +89,7 @@ class SchedDFSResult { TreeData() = default; }; - /// \brief Record a connection between subtrees and the connection level. + /// Record a connection between subtrees and the connection level. struct Connection { unsigned TreeID; unsigned Level; @@ -117,15 +117,15 @@ public: SchedDFSResult(bool IsBU, unsigned lim) : IsBottomUp(IsBU), SubtreeLimit(lim) {} - /// \brief Get the node cutoff before subtrees are considered significant. + /// Get the node cutoff before subtrees are considered significant. unsigned getSubtreeLimit() const { return SubtreeLimit; } - /// \brief Return true if this DFSResult is uninitialized. + /// Return true if this DFSResult is uninitialized. /// /// resize() initializes DFSResult, while compute() populates it. bool empty() const { return DFSNodeData.empty(); } - /// \brief Clear the results. + /// Clear the results. void clear() { DFSNodeData.clear(); DFSTreeData.clear(); @@ -133,37 +133,37 @@ public: SubtreeConnectLevels.clear(); } - /// \brief Initialize the result data with the size of the DAG. + /// Initialize the result data with the size of the DAG. void resize(unsigned NumSUnits) { DFSNodeData.resize(NumSUnits); } - /// \brief Compute various metrics for the DAG with given roots. + /// Compute various metrics for the DAG with given roots. void compute(ArrayRef<SUnit> SUnits); - /// \brief Get the number of instructions in the given subtree and its + /// Get the number of instructions in the given subtree and its /// children. unsigned getNumInstrs(const SUnit *SU) const { return DFSNodeData[SU->NodeNum].InstrCount; } - /// \brief Get the number of instructions in the given subtree not including + /// Get the number of instructions in the given subtree not including /// children. unsigned getNumSubInstrs(unsigned SubtreeID) const { return DFSTreeData[SubtreeID].SubInstrCount; } - /// \brief Get the ILP value for a DAG node. + /// Get the ILP value for a DAG node. /// /// A leaf node has an ILP of 1/1. ILPValue getILP(const SUnit *SU) const { return ILPValue(DFSNodeData[SU->NodeNum].InstrCount, 1 + SU->getDepth()); } - /// \brief The number of subtrees detected in this DAG. + /// The number of subtrees detected in this DAG. unsigned getNumSubtrees() const { return SubtreeConnectLevels.size(); } - /// \brief Get the ID of the subtree the given DAG node belongs to. + /// Get the ID of the subtree the given DAG node belongs to. /// /// For convenience, if DFSResults have not been computed yet, give everything /// tree ID 0. @@ -174,7 +174,7 @@ public: return DFSNodeData[SU->NodeNum].SubtreeID; } - /// \brief Get the connection level of a subtree. + /// Get the connection level of a subtree. /// /// For bottom-up trees, the connection level is the latency depth (in cycles) /// of the deepest connection to another subtree. @@ -182,7 +182,7 @@ public: return SubtreeConnectLevels[SubtreeID]; } - /// \brief Scheduler callback to update SubtreeConnectLevels when a tree is + /// Scheduler callback to update SubtreeConnectLevels when a tree is /// initially scheduled. void scheduleTree(unsigned SubtreeID); }; diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h index 466ab532030c..3f75d108f282 100644 --- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h +++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h @@ -106,7 +106,7 @@ class ScoreboardHazardRecognizer : public ScheduleHazardRecognizer { Scoreboard RequiredScoreboard; public: - ScoreboardHazardRecognizer(const InstrItineraryData *ItinData, + ScoreboardHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG, const char *ParentDebugType = ""); diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 6a5c2db34bb1..888f9425ff90 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -28,11 +28,12 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DebugLoc.h" @@ -44,6 +45,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/RecyclingAllocator.h" #include <algorithm> #include <cassert> @@ -71,8 +73,10 @@ class MachineConstantPoolValue; class MCSymbol; class OptimizationRemarkEmitter; class SDDbgValue; +class SDDbgLabel; class SelectionDAG; class SelectionDAGTargetInfo; +class TargetLibraryInfo; class TargetLowering; class TargetMachine; class TargetSubtargetInfo; @@ -145,6 +149,7 @@ class SDDbgInfo { BumpPtrAllocator Alloc; SmallVector<SDDbgValue*, 32> DbgValues; SmallVector<SDDbgValue*, 32> ByvalParmDbgValues; + SmallVector<SDDbgLabel*, 4> DbgLabels; using DbgValMapType = DenseMap<const SDNode *, SmallVector<SDDbgValue *, 2>>; DbgValMapType DbgValMap; @@ -161,7 +166,11 @@ public: DbgValMap[Node].push_back(V); } - /// \brief Invalidate all DbgValues attached to the node and remove + void add(SDDbgLabel *L) { + DbgLabels.push_back(L); + } + + /// Invalidate all DbgValues attached to the node and remove /// it from the Node-to-DbgValues map. void erase(const SDNode *Node); @@ -169,13 +178,14 @@ public: DbgValMap.clear(); DbgValues.clear(); ByvalParmDbgValues.clear(); + DbgLabels.clear(); Alloc.Reset(); } BumpPtrAllocator &getAlloc() { return Alloc; } bool empty() const { - return DbgValues.empty() && ByvalParmDbgValues.empty(); + return DbgValues.empty() && ByvalParmDbgValues.empty() && DbgLabels.empty(); } ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) { @@ -186,11 +196,14 @@ public: } using DbgIterator = SmallVectorImpl<SDDbgValue*>::iterator; + using DbgLabelIterator = SmallVectorImpl<SDDbgLabel*>::iterator; DbgIterator DbgBegin() { return DbgValues.begin(); } DbgIterator DbgEnd() { return DbgValues.end(); } DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); } DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); } + DbgLabelIterator DbgLabelBegin() { return DbgLabels.begin(); } + DbgLabelIterator DbgLabelEnd() { return DbgLabels.end(); } }; void checkForCycles(const SelectionDAG *DAG, bool force = false); @@ -210,11 +223,15 @@ class SelectionDAG { const TargetMachine &TM; const SelectionDAGTargetInfo *TSI = nullptr; const TargetLowering *TLI = nullptr; + const TargetLibraryInfo *LibInfo = nullptr; MachineFunction *MF; Pass *SDAGISelPass = nullptr; LLVMContext *Context; CodeGenOpt::Level OptLevel; + DivergenceAnalysis * DA = nullptr; + FunctionLoweringInfo * FLI = nullptr; + /// The function-level optimization remark emitter. Used to emit remarks /// whenever manipulating the DAG. OptimizationRemarkEmitter *ORE; @@ -248,7 +265,7 @@ class SelectionDAG { /// Pool allocation for misc. objects that are created once per SelectionDAG. BumpPtrAllocator Allocator; - /// Tracks dbg_value information through SDISel. + /// Tracks dbg_value and dbg_label information through SDISel. SDDbgInfo *DbgInfo; uint16_t NextPersistentId = 0; @@ -344,19 +361,7 @@ private: .getRawSubclassData(); } - void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { - assert(!Node->OperandList && "Node already has operands"); - SDUse *Ops = OperandRecycler.allocate( - ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); - - for (unsigned I = 0; I != Vals.size(); ++I) { - Ops[I].setUser(Node); - Ops[I].setInitial(Vals[I]); - } - Node->NumOperands = Vals.size(); - Node->OperandList = Ops; - checkForCycles(Node); - } + void createOperands(SDNode *Node, ArrayRef<SDValue> Vals); void removeOperands(SDNode *Node) { if (!Node->OperandList) @@ -367,7 +372,7 @@ private: Node->NumOperands = 0; Node->OperandList = nullptr; } - + void CreateTopologicalOrder(std::vector<SDNode*>& Order); public: explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); SelectionDAG(const SelectionDAG &) = delete; @@ -376,7 +381,12 @@ public: /// Prepare this SelectionDAG to process code in the given MachineFunction. void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr); + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, + DivergenceAnalysis * Divergence); + + void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) { + FLI = FuncInfo; + } /// Clear state and free memory necessary to make this /// SelectionDAG ready to process a new block. @@ -389,6 +399,7 @@ public: const TargetMachine &getTarget() const { return TM; } const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); } const TargetLowering &getTargetLoweringInfo() const { return *TLI; } + const TargetLibraryInfo &getLibInfo() const { return *LibInfo; } const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; } LLVMContext *getContext() const {return Context; } OptimizationRemarkEmitter &getORE() const { return *ORE; } @@ -460,6 +471,8 @@ public: return Root; } + void VerifyDAGDiverence(); + /// This iterates over the nodes in the SelectionDAG, folding /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and @@ -483,7 +496,7 @@ public: /// the graph. void Legalize(); - /// \brief Transforms a SelectionDAG node and any operands to it into a node + /// Transforms a SelectionDAG node and any operands to it into a node /// that is compatible with the target instruction selector, as indicated by /// the TargetLowering object. /// @@ -534,7 +547,7 @@ public: //===--------------------------------------------------------------------===// // Node creation methods. - /// \brief Create a ConstantSDNode wrapping a constant value. + /// Create a ConstantSDNode wrapping a constant value. /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR. /// /// If only legal types can be produced, this does the necessary @@ -567,9 +580,13 @@ public: bool isOpaque = false) { return getConstant(Val, DL, VT, true, isOpaque); } + + /// Create a true or false constant of type \p VT using the target's + /// BooleanContent for type \p OpVT. + SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT); /// @} - /// \brief Create a ConstantFPSDNode wrapping a constant value. + /// Create a ConstantFPSDNode wrapping a constant value. /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR. /// /// If only legal types can be produced, this does the necessary @@ -581,7 +598,7 @@ public: bool isTarget = false); SDValue getConstantFP(const APFloat &Val, const SDLoc &DL, EVT VT, bool isTarget = false); - SDValue getConstantFP(const ConstantFP &CF, const SDLoc &DL, EVT VT, + SDValue getConstantFP(const ConstantFP &V, const SDLoc &DL, EVT VT, bool isTarget = false); SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT) { return getConstantFP(Val, DL, VT, true); @@ -741,7 +758,7 @@ public: return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } - /// \brief Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to + /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to /// the shuffle node in input but with swapped operands. /// /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3> @@ -765,7 +782,7 @@ public: /// Return the expression required to zero extend the Op /// value assuming it was the smaller SrcTy value. - SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy); + SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT); /// Return an operation which will any-extend the low lanes of the operand /// into the specified vector type. For example, @@ -793,10 +810,10 @@ public: /// Create a bitwise NOT operation as (XOR Val, -1). SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT); - /// \brief Create a logical NOT operation as (XOR Val, BooleanOne). + /// Create a logical NOT operation as (XOR Val, BooleanOne). SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT); - /// \brief Create an add instruction with appropriate flags when used for + /// Create an add instruction with appropriate flags when used for /// addressing some offset of an object. i.e. if a load is split into multiple /// components, create an add nuw from the base pointer to the offset. SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) { @@ -862,17 +879,18 @@ public: ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops); - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops); // Specialize based on number of operands. SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT); - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N, + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, SDValue N3); + SDValue N2, SDValue N3, + const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, @@ -880,15 +898,15 @@ public: // Specialize again based on number of operands for nodes with a VTList // rather than a single VT. - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs); - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N); - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1, + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList); + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N); + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2); - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1, + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3); - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1, + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4); - SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, SDValue N1, + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5); /// Compute a TokenFactor to force all the incoming stack arguments to be @@ -910,6 +928,23 @@ public: SDValue Size, unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo); + SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, + unsigned DstAlign, SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo); + + SDValue getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, + unsigned DstAlign, SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo); + + SDValue getAtomicMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, + unsigned DstAlign, SDValue Value, SDValue Size, + Type *SizeTy, unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo); + /// Helper function to make it easier to build SetCC's if you just /// have an ISD::CondCode instead of an SDValue. /// @@ -1050,12 +1085,12 @@ public: MachineMemOperand *MMO); SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment = 0, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()); SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, EVT TVT, MachineMemOperand *MMO); - SDValue getIndexedStore(SDValue OrigStoe, const SDLoc &dl, SDValue Base, + SDValue Ptr, EVT SVT, MachineMemOperand *MMO); + SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); /// Returns sum of the base pointer and offset. @@ -1121,28 +1156,31 @@ public: SDValue Op3, SDValue Op4, SDValue Op5); SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops); + // Propagates the change in divergence to users + void updateDivergence(SDNode * N); + /// These are used for target selectors to *mutate* the /// specified node to have the specified return type, Target opcode, and /// operands. Note that target opcodes are stored as /// ~TargetOpcode in the node opcode field. The resultant node is returned. - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT); + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1); + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1, SDValue Op2); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, ArrayRef<SDValue> Ops); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, EVT VT2); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2); + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, ArrayRef<SDValue> Ops); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, ArrayRef<SDValue> Ops); SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, EVT VT2, SDValue Op1); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs, + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs, ArrayRef<SDValue> Ops); /// This *mutates* the specified node to have the specified @@ -1197,7 +1235,7 @@ public: SDValue Operand, SDValue Subreg); /// Get the specified node if it's already available, or else return NULL. - SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef<SDValue> Ops, + SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags()); /// Creates a SDDbgValue node. @@ -1212,8 +1250,16 @@ public: /// Creates a FrameIndex SDDbgValue node. SDDbgValue *getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr, - unsigned FI, const DebugLoc &DL, - unsigned O); + unsigned FI, bool IsIndirect, + const DebugLoc &DL, unsigned O); + + /// Creates a VReg SDDbgValue node. + SDDbgValue *getVRegDbgValue(DIVariable *Var, DIExpression *Expr, + unsigned VReg, bool IsIndirect, + const DebugLoc &DL, unsigned O); + + /// Creates a SDDbgLabel node. + SDDbgLabel *getDbgLabel(DILabel *Label, const DebugLoc &DL, unsigned O); /// Transfer debug values from one node to another, while optionally /// generating fragment expressions for split-up values. If \p InvalidateDbg @@ -1245,7 +1291,7 @@ public: /// to be given new uses. These new uses of From are left in place, and /// not automatically transferred to To. /// - void ReplaceAllUsesWith(SDValue From, SDValue Op); + void ReplaceAllUsesWith(SDValue From, SDValue To); void ReplaceAllUsesWith(SDNode *From, SDNode *To); void ReplaceAllUsesWith(SDNode *From, const SDValue *To); @@ -1296,6 +1342,9 @@ public: /// value is produced by SD. void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter); + /// Add a dbg_label SDNode. + void AddDbgLabel(SDDbgLabel *DB); + /// Get the debug values which reference the given SDNode. ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) { return DbgInfo->getSDDbgValues(SD); @@ -1317,6 +1366,13 @@ public: return DbgInfo->ByvalParmDbgEnd(); } + SDDbgInfo::DbgLabelIterator DbgLabelBegin() { + return DbgInfo->DbgLabelBegin(); + } + SDDbgInfo::DbgLabelIterator DbgLabelEnd() { + return DbgInfo->DbgLabelEnd(); + } + /// To be invoked on an SDNode that is slated to be erased. This /// function mirrors \c llvm::salvageDebugInfo. void salvageDebugInfo(SDNode &N); @@ -1431,8 +1487,11 @@ public: /// Test whether the given SDValue is known to never be NaN. bool isKnownNeverNaN(SDValue Op) const; - /// Test whether the given SDValue is known to never be positive or negative - /// zero. + /// Test whether the given floating point SDValue is known to never be + /// positive or negative zero. + bool isKnownNeverZeroFloat(SDValue Op) const; + + /// Test whether the given SDValue is known to contain non-zero value(s). bool isKnownNeverZero(SDValue Op) const; /// Test whether two SDValues are known to compare equal. This diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 18e4c7a83def..580606441a9d 100644 --- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -56,7 +56,7 @@ public: int64_t &Off); /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG); + static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG); }; } // end namespace llvm diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index de6849a1eae1..86df0af7303f 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -110,6 +110,11 @@ public: CodeGenOpt::Level OptLevel, bool IgnoreChains = false); + static void InvalidateNodeId(SDNode *N); + static int getUninvalidatedNodeId(SDNode *N); + + static void EnforceNodeIdInvariant(SDNode *N); + // Opcodes used by the DAG state machine: enum BuiltinOpcodes { OPC_Scope, @@ -199,23 +204,28 @@ protected: /// of the new node T. void ReplaceUses(SDValue F, SDValue T) { CurDAG->ReplaceAllUsesOfValueWith(F, T); + EnforceNodeIdInvariant(T.getNode()); } /// ReplaceUses - replace all uses of the old nodes F with the use /// of the new nodes T. void ReplaceUses(const SDValue *F, const SDValue *T, unsigned Num) { CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num); + for (unsigned i = 0; i < Num; ++i) + EnforceNodeIdInvariant(T[i].getNode()); } /// ReplaceUses - replace all uses of the old node F with the use /// of the new node T. void ReplaceUses(SDNode *F, SDNode *T) { CurDAG->ReplaceAllUsesWith(F, T); + EnforceNodeIdInvariant(T); } /// Replace all uses of \c F with \c T, then remove \c F from the DAG. void ReplaceNode(SDNode *F, SDNode *T) { CurDAG->ReplaceAllUsesWith(F, T); + EnforceNodeIdInvariant(T); CurDAG->RemoveDeadNode(F); } @@ -270,7 +280,7 @@ public: void SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned TableSize); - /// \brief Return true if complex patterns for this target can mutate the + /// Return true if complex patterns for this target can mutate the /// DAG. virtual bool ComplexPatternFuncMutatesDAG() const { return false; @@ -282,14 +292,14 @@ private: // Calls to these functions are generated by tblgen. void Select_INLINEASM(SDNode *N); - void Select_READ_REGISTER(SDNode *N); - void Select_WRITE_REGISTER(SDNode *N); + void Select_READ_REGISTER(SDNode *Op); + void Select_WRITE_REGISTER(SDNode *Op); void Select_UNDEF(SDNode *N); void CannotYetSelect(SDNode *N); private: void DoInstructionSelection(); - SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTs, + SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, ArrayRef<SDValue> Ops, unsigned EmitNodeInfo); SDNode *MutateStrictFPToFP(SDNode *Node, unsigned NewOpc); @@ -299,10 +309,10 @@ private: /// instruction selected, false if no code should be emitted for it. bool PrepareEHLandingPad(); - /// \brief Perform instruction selection on all basic blocks in the function. + /// Perform instruction selection on all basic blocks in the function. void SelectAllBasicBlocks(const Function &Fn); - /// \brief Perform instruction selection on a single basic block, for + /// Perform instruction selection on a single basic block, for /// instructions between \p Begin and \p End. \p HadTailCall will be set /// to true if a call in the block was translated as a tail call. void SelectBasicBlock(BasicBlock::const_iterator Begin, @@ -312,7 +322,7 @@ private: void CodeGenAndEmitDAG(); - /// \brief Generate instructions for lowering the incoming arguments of the + /// Generate instructions for lowering the incoming arguments of the /// given function. void LowerArguments(const Function &F); diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 522c2f1b2cb2..1af22185d366 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -31,17 +31,18 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include <algorithm> #include <cassert> #include <climits> @@ -189,8 +190,10 @@ public: inline bool isUndef() const; inline unsigned getMachineOpcode() const; inline const DebugLoc &getDebugLoc() const; - inline void dump(const SelectionDAG *G = nullptr) const; - inline void dumpr(const SelectionDAG *G = nullptr) const; + inline void dump() const; + inline void dump(const SelectionDAG *G) const; + inline void dumpr() const; + inline void dumpr(const SelectionDAG *G) const; /// Return true if this operand (which must be a chain) reaches the /// specified operand without crossing any side-effecting instructions. @@ -357,21 +360,34 @@ private: bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; - bool UnsafeAlgebra : 1; bool NoNaNs : 1; bool NoInfs : 1; bool NoSignedZeros : 1; bool AllowReciprocal : 1; bool VectorReduction : 1; bool AllowContract : 1; + bool ApproximateFuncs : 1; + bool AllowReassociation : 1; public: /// Default constructor turns off all optimization flags. SDNodeFlags() : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false), + Exact(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), - AllowContract(false) {} + AllowContract(false), ApproximateFuncs(false), + AllowReassociation(false) {} + + /// Propagate the fast-math-flags from an IR FPMathOperator. + void copyFMF(const FPMathOperator &FPMO) { + setNoNaNs(FPMO.hasNoNaNs()); + setNoInfs(FPMO.hasNoInfs()); + setNoSignedZeros(FPMO.hasNoSignedZeros()); + setAllowReciprocal(FPMO.hasAllowReciprocal()); + setAllowContract(FPMO.hasAllowContract()); + setApproximateFuncs(FPMO.hasApproxFunc()); + setAllowReassociation(FPMO.hasAllowReassoc()); + } /// Sets the state of the flags to the defined state. void setDefined() { AnyDefined = true; } @@ -391,10 +407,6 @@ public: setDefined(); Exact = b; } - void setUnsafeAlgebra(bool b) { - setDefined(); - UnsafeAlgebra = b; - } void setNoNaNs(bool b) { setDefined(); NoNaNs = b; @@ -419,18 +431,32 @@ public: setDefined(); AllowContract = b; } + void setApproximateFuncs(bool b) { + setDefined(); + ApproximateFuncs = b; + } + void setAllowReassociation(bool b) { + setDefined(); + AllowReassociation = b; + } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } bool hasNoSignedWrap() const { return NoSignedWrap; } bool hasExact() const { return Exact; } - bool hasUnsafeAlgebra() const { return UnsafeAlgebra; } bool hasNoNaNs() const { return NoNaNs; } bool hasNoInfs() const { return NoInfs; } bool hasNoSignedZeros() const { return NoSignedZeros; } bool hasAllowReciprocal() const { return AllowReciprocal; } bool hasVectorReduction() const { return VectorReduction; } bool hasAllowContract() const { return AllowContract; } + bool hasApproximateFuncs() const { return ApproximateFuncs; } + bool hasAllowReassociation() const { return AllowReassociation; } + + bool isFast() const { + return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && + AllowContract && ApproximateFuncs && AllowReassociation; + } /// Clear any flags in this flag set that aren't also set in Flags. /// If the given Flags are undefined then don't do anything. @@ -440,13 +466,14 @@ public: NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; - UnsafeAlgebra &= Flags.UnsafeAlgebra; NoNaNs &= Flags.NoNaNs; NoInfs &= Flags.NoInfs; NoSignedZeros &= Flags.NoSignedZeros; AllowReciprocal &= Flags.AllowReciprocal; VectorReduction &= Flags.VectorReduction; AllowContract &= Flags.AllowContract; + ApproximateFuncs &= Flags.ApproximateFuncs; + AllowReassociation &= Flags.AllowReassociation; } }; @@ -466,11 +493,13 @@ protected: friend class SDNode; friend class MemIntrinsicSDNode; friend class MemSDNode; + friend class SelectionDAG; uint16_t HasDebugValue : 1; uint16_t IsMemIntrinsic : 1; + uint16_t IsDivergent : 1; }; - enum { NumSDNodeBits = 2 }; + enum { NumSDNodeBits = 3 }; class ConstantSDNodeBitfields { friend class ConstantSDNode; @@ -540,7 +569,7 @@ protected: static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide"); static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide"); static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide"); - static_assert(sizeof(LoadSDNodeBitfields) <= 4, "field too wide"); + static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide"); static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide"); private: @@ -662,6 +691,8 @@ public: bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } + bool isDivergent() const { return SDNodeBits.IsDivergent; } + /// Return true if there are no uses of this node. bool use_empty() const { return UseList == nullptr; } @@ -796,16 +827,44 @@ public: /// searches to be performed in parallel, caching of results across /// queries and incremental addition to Worklist. Stops early if N is /// found but will resume. Remember to clear Visited and Worklists - /// if DAG changes. + /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before + /// giving up. The TopologicalPrune flag signals that positive NodeIds are + /// topologically ordered (Operands have strictly smaller node id) and search + /// can be pruned leveraging this. static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl<const SDNode *> &Visited, SmallVectorImpl<const SDNode *> &Worklist, - unsigned int MaxSteps = 0) { + unsigned int MaxSteps = 0, + bool TopologicalPrune = false) { + SmallVector<const SDNode *, 8> DeferredNodes; if (Visited.count(N)) return true; + + // Node Id's are assigned in three places: As a topological + // ordering (> 0), during legalization (results in values set to + // 0), new nodes (set to -1). If N has a topolgical id then we + // know that all nodes with ids smaller than it cannot be + // successors and we need not check them. Filter out all node + // that can't be matches. We add them to the worklist before exit + // in case of multiple calls. Note that during selection the topological id + // may be violated if a node's predecessor is selected before it. We mark + // this at selection negating the id of unselected successors and + // restricting topological pruning to positive ids. + + int NId = N->getNodeId(); + // If we Invalidated the Id, reconstruct original NId. + if (NId < -1) + NId = -(NId + 1); + + bool Found = false; while (!Worklist.empty()) { const SDNode *M = Worklist.pop_back_val(); - bool Found = false; + int MId = M->getNodeId(); + if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && + (MId > 0) && (MId < NId)) { + DeferredNodes.push_back(M); + continue; + } for (const SDValue &OpV : M->op_values()) { SDNode *Op = OpV.getNode(); if (Visited.insert(Op).second) @@ -814,11 +873,16 @@ public: Found = true; } if (Found) - return true; + break; if (MaxSteps != 0 && Visited.size() >= MaxSteps) - return false; + break; } - return false; + // Push deferred nodes back on worklist. + Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); + // If we bailed early, conservatively return found. + if (MaxSteps != 0 && Visited.size() >= MaxSteps) + return true; + return Found; } /// Return true if all the users of N are contained in Nodes. @@ -884,6 +948,7 @@ public: const SDNodeFlags getFlags() const { return Flags; } void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } + bool isFast() { return Flags.isFast(); } /// Clear any flags in this node that aren't also set in Flags. /// If Flags is not in a defined state then this has no effect. @@ -1089,10 +1154,18 @@ inline const DebugLoc &SDValue::getDebugLoc() const { return Node->getDebugLoc(); } +inline void SDValue::dump() const { + return Node->dump(); +} + inline void SDValue::dump(const SelectionDAG *G) const { return Node->dump(G); } +inline void SDValue::dumpr() const { + return Node->dumpr(); +} + inline void SDValue::dumpr(const SelectionDAG *G) const { return Node->dumpr(G); } @@ -1173,7 +1246,7 @@ protected: public: MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemoryVT, MachineMemOperand *MMO); + EVT memvt, MachineMemOperand *MMO); bool readMem() const { return MMO->isLoad(); } bool writeMem() const { return MMO->isStore(); } @@ -1190,7 +1263,8 @@ public: /// encoding of the volatile flag, as well as bits used by subclasses. This /// function should only be used to compute a FoldingSetNodeID value. /// The HasDebugValue bit is masked out because CSE map needs to match - /// nodes with debug info with nodes without debug info. + /// nodes with debug info with nodes without debug info. Same is about + /// isDivergent bit. unsigned getRawSubclassData() const { uint16_t Data; union { @@ -1199,6 +1273,7 @@ public: }; memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits)); SDNodeBits.HasDebugValue = 0; + SDNodeBits.IsDivergent = false; memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits)); return Data; } @@ -1267,6 +1342,7 @@ public: N->getOpcode() == ISD::ATOMIC_LOAD_ADD || N->getOpcode() == ISD::ATOMIC_LOAD_SUB || N->getOpcode() == ISD::ATOMIC_LOAD_AND || + N->getOpcode() == ISD::ATOMIC_LOAD_CLR || N->getOpcode() == ISD::ATOMIC_LOAD_OR || N->getOpcode() == ISD::ATOMIC_LOAD_XOR || N->getOpcode() == ISD::ATOMIC_LOAD_NAND || @@ -1318,6 +1394,7 @@ public: N->getOpcode() == ISD::ATOMIC_LOAD_ADD || N->getOpcode() == ISD::ATOMIC_LOAD_SUB || N->getOpcode() == ISD::ATOMIC_LOAD_AND || + N->getOpcode() == ISD::ATOMIC_LOAD_CLR || N->getOpcode() == ISD::ATOMIC_LOAD_OR || N->getOpcode() == ISD::ATOMIC_LOAD_XOR || N->getOpcode() == ISD::ATOMIC_LOAD_NAND || @@ -1421,9 +1498,8 @@ class ConstantSDNode : public SDNode { const ConstantInt *Value; - ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, - const DebugLoc &DL, EVT VT) - : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DL, + ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT) + : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(), getSDVTList(VT)), Value(val) { ConstantSDNodeBits.IsOpaque = isOpaque; @@ -1459,10 +1535,9 @@ class ConstantFPSDNode : public SDNode { const ConstantFP *Value; - ConstantFPSDNode(bool isTarget, const ConstantFP *val, const DebugLoc &DL, - EVT VT) - : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, DL, - getSDVTList(VT)), + ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) + : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, + DebugLoc(), getSDVTList(VT)), Value(val) {} public: @@ -1519,10 +1594,10 @@ bool isOneConstant(SDValue V); bool isBitwiseNot(SDValue V); /// Returns the SDNode if it is a constant splat BuildVector or constant int. -ConstantSDNode *isConstOrConstSplat(SDValue V); +ConstantSDNode *isConstOrConstSplat(SDValue N); /// Returns the SDNode if it is a constant splat BuildVector or constant float. -ConstantFPSDNode *isConstOrConstSplatFP(SDValue V); +ConstantFPSDNode *isConstOrConstSplatFP(SDValue N); class GlobalAddressSDNode : public SDNode { friend class SelectionDAG; @@ -1533,7 +1608,7 @@ class GlobalAddressSDNode : public SDNode { GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, int64_t o, - unsigned char TargetFlags); + unsigned char TF); public: const GlobalValue *getGlobal() const { return TheGlobal; } @@ -1714,13 +1789,13 @@ public: unsigned MinSplatBits = 0, bool isBigEndian = false) const; - /// \brief Returns the splatted value or a null value if this is not a splat. + /// Returns the splatted value or a null value if this is not a splat. /// /// If passed a non-null UndefElements bitvector, it will resize it to match /// the vector width and set the bits where elements are undef. SDValue getSplatValue(BitVector *UndefElements = nullptr) const; - /// \brief Returns the splatted constant or null if this is not a constant + /// Returns the splatted constant or null if this is not a constant /// splat. /// /// If passed a non-null UndefElements bitvector, it will resize it to match @@ -1728,7 +1803,7 @@ public: ConstantSDNode * getConstantSplatNode(BitVector *UndefElements = nullptr) const; - /// \brief Returns the splatted constant FP or null if this is not a constant + /// Returns the splatted constant FP or null if this is not a constant /// FP splat. /// /// If passed a non-null UndefElements bitvector, it will resize it to match @@ -1736,7 +1811,7 @@ public: ConstantFPSDNode * getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; - /// \brief If this is a constant FP splat and the splatted constant FP is an + /// If this is a constant FP splat and the splatted constant FP is an /// exact power or 2, return the log base 2 integer value. Otherwise, /// return -1. /// @@ -2120,13 +2195,14 @@ public: : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} // In the both nodes address is Op1, mask is Op2: - // MaskedGatherSDNode (Chain, src0, mask, base, index), src0 is a passthru value - // MaskedScatterSDNode (Chain, value, mask, base, index) + // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) + // MaskedScatterSDNode (Chain, value, mask, base, index, scale) // Mask is a vector of i1 elements const SDValue &getBasePtr() const { return getOperand(3); } const SDValue &getIndex() const { return getOperand(4); } const SDValue &getMask() const { return getOperand(2); } const SDValue &getValue() const { return getOperand(1); } + const SDValue &getScale() const { return getOperand(5); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MGATHER || @@ -2329,6 +2405,17 @@ namespace ISD { cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; } + /// Attempt to match a unary predicate against a scalar/splat constant or + /// every element of a constant BUILD_VECTOR. + bool matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match); + + /// Attempt to match a binary predicate against a pair of scalar/splat + /// constants or every element of a pair of constant BUILD_VECTORs. + bool matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match); + } // end namespace ISD } // end namespace llvm diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 3a91e363f923..334267d9828b 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -578,9 +578,9 @@ class raw_ostream; assert(!MI.isInsideBundle() && "Instructions inside bundles should use bundle start's slot."); assert(mi2iMap.find(&MI) == mi2iMap.end() && "Instr already indexed."); - // Numbering DBG_VALUE instructions could cause code generation to be + // Numbering debug instructions could cause code generation to be // affected by debug information. - assert(!MI.isDebugValue() && "Cannot number DBG_VALUE instructions."); + assert(!MI.isDebugInstr() && "Cannot number debug instructions."); assert(MI.getParent() != nullptr && "Instr must be added to function."); @@ -674,10 +674,10 @@ class raw_ostream; idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb)); renumberIndexes(newItr); - std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); } - /// \brief Free the resources that were required to maintain a SlotIndex. + /// Free the resources that were required to maintain a SlotIndex. /// /// Once an index is no longer needed (for instance because the instruction /// at that index has been moved), the resources required to maintain the diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index 4407114d2741..3c9850265737 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -29,7 +29,7 @@ class MCStreamer; class raw_ostream; class TargetRegisterInfo; -/// \brief MI-level stackmap operands. +/// MI-level stackmap operands. /// /// MI stackmap operations take the form: /// <id>, <numBytes>, live args... @@ -60,7 +60,7 @@ public: } }; -/// \brief MI-level patchpoint operands. +/// MI-level patchpoint operands. /// /// MI patchpoint operations take the form: /// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... @@ -137,7 +137,7 @@ public: return getVarIdx(); } - /// \brief Get the next scratch register operand index. + /// Get the next scratch register operand index. unsigned getNextScratchIdx(unsigned StartIdx = 0) const; }; @@ -236,15 +236,15 @@ public: FnInfos.clear(); } - /// \brief Generate a stackmap record for a stackmap instruction. + /// Generate a stackmap record for a stackmap instruction. /// /// MI must be a raw STACKMAP, not a PATCHPOINT. void recordStackMap(const MachineInstr &MI); - /// \brief Generate a stackmap record for a patchpoint instruction. + /// Generate a stackmap record for a patchpoint instruction. void recordPatchPoint(const MachineInstr &MI); - /// \brief Generate a stackmap record for a statepoint instruction. + /// Generate a stackmap record for a statepoint instruction. void recordStatepoint(const MachineInstr &MI); /// If there is any stack map data, create a stack map section and serialize @@ -293,11 +293,11 @@ private: MachineInstr::const_mop_iterator MOE, LocationVec &Locs, LiveOutVec &LiveOuts) const; - /// \brief Create a live-out register record for the given register @p Reg. + /// Create a live-out register record for the given register @p Reg. LiveOutReg createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const; - /// \brief Parse the register live-out mask and return a vector of live-out + /// Parse the register live-out mask and return a vector of live-out /// registers that need to be recorded in the stackmap. LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const; @@ -311,16 +311,16 @@ private: MachineInstr::const_mop_iterator MOE, bool recordResult = false); - /// \brief Emit the stackmap header. + /// Emit the stackmap header. void emitStackmapHeader(MCStreamer &OS); - /// \brief Emit the function frame record for each function. + /// Emit the function frame record for each function. void emitFunctionFrameRecords(MCStreamer &OS); - /// \brief Emit the constant pool. + /// Emit the constant pool. void emitConstantPoolEntries(MCStreamer &OS); - /// \brief Emit the callsite info for each stackmap/patchpoint intrinsic call. + /// Emit the callsite info for each stackmap/patchpoint intrinsic call. void emitCallsiteEntries(MCStreamer &OS); void print(raw_ostream &OS); diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h index 72de212d0df9..a506ac636a17 100644 --- a/include/llvm/CodeGen/StackProtector.h +++ b/include/llvm/CodeGen/StackProtector.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" @@ -35,24 +36,11 @@ class TargetMachine; class Type; class StackProtector : public FunctionPass { -public: - /// SSPLayoutKind. Stack Smashing Protection (SSP) rules require that - /// vulnerable stack allocations are located close the stack protector. - enum SSPLayoutKind { - SSPLK_None, ///< Did not trigger a stack protector. No effect on data - ///< layout. - SSPLK_LargeArray, ///< Array or nested array >= SSP-buffer-size. Closest - ///< to the stack protector. - SSPLK_SmallArray, ///< Array or nested array < SSP-buffer-size. 2nd closest - ///< to the stack protector. - SSPLK_AddrOf ///< The address of this allocation is exposed and - ///< triggered protection. 3rd closest to the protector. - }; - +private: /// A mapping of AllocaInsts to their required SSP layout. - using SSPLayoutMap = ValueMap<const AllocaInst *, SSPLayoutKind>; + using SSPLayoutMap = DenseMap<const AllocaInst *, + MachineFrameInfo::SSPLayoutKind>; -private: const TargetMachine *TM = nullptr; /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -70,7 +58,7 @@ private: /// AllocaInst triggers a stack protector. SSPLayoutMap Layout; - /// \brief The minimum size of buffers that will receive stack smashing + /// The minimum size of buffers that will receive stack smashing /// protection when -fstack-protection is used. unsigned SSPBufferSize = 0; @@ -107,7 +95,7 @@ private: bool ContainsProtectableArray(Type *Ty, bool &IsLarge, bool Strong = false, bool InStruct = false) const; - /// \brief Check whether a stack allocation has its address taken. + /// Check whether a stack allocation has its address taken. bool HasAddressTaken(const Instruction *AI); /// RequiresStackProtector - Check whether or not this function needs a @@ -123,14 +111,12 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; - SSPLayoutKind getSSPLayout(const AllocaInst *AI) const; - // Return true if StackProtector is supposed to be handled by SelectionDAG. bool shouldEmitSDCheck(const BasicBlock &BB) const; - void adjustForColoring(const AllocaInst *From, const AllocaInst *To); - bool runOnFunction(Function &Fn) override; + + void copyToMachineFrameInfo(MachineFrameInfo &MFI) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h index 8646a15599cb..7d138f585171 100644 --- a/include/llvm/CodeGen/TargetCallingConv.h +++ b/include/llvm/CodeGen/TargetCallingConv.h @@ -14,8 +14,8 @@ #ifndef LLVM_CODEGEN_TARGETCALLINGCONV_H #define LLVM_CODEGEN_TARGETCALLINGCONV_H -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <climits> diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h index 61f1cf07bcf2..f8effee998e3 100644 --- a/include/llvm/CodeGen/TargetFrameLowering.h +++ b/include/llvm/CodeGen/TargetFrameLowering.h @@ -158,6 +158,10 @@ public: return false; } + /// Returns true if the target can safely skip saving callee-saved registers + /// for noreturn nounwind functions. + virtual bool enableCalleeSaveSkip(const MachineFunction &MF) const; + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. virtual void emitPrologue(MachineFunction &MF, @@ -341,6 +345,14 @@ public: return false; return true; } + + /// Return initial CFA offset value i.e. the one valid at the beginning of the + /// function (before any stack operations). + virtual int getInitialCFAOffset(const MachineFunction &MF) const; + + /// Return initial CFA register value i.e. the one valid at the beginning of + /// the function (before any stack operations). + virtual unsigned getInitialCFARegister(const MachineFunction &MF) const; }; } // End llvm namespace diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 38a1b33aecad..b5bc561d834c 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -18,12 +18,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" @@ -79,7 +81,7 @@ public: /// Given a machine instruction descriptor, returns the register /// class constraint for OpNum, or NULL. - const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, + const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const; @@ -225,6 +227,17 @@ public: return 0; } + /// Optional extension of isLoadFromStackSlot that returns the number of + /// bytes loaded from the stack. This must be implemented if a backend + /// supports partial stack slot spills/loads to further disambiguate + /// what the load does. + virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const { + MemBytes = 0; + return isLoadFromStackSlot(MI, FrameIndex); + } + /// Check for post-frame ptr elimination stack locations as well. /// This uses a heuristic so it isn't reliable for correctness. virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, @@ -252,6 +265,17 @@ public: return 0; } + /// Optional extension of isStoreToStackSlot that returns the number of + /// bytes stored to the stack. This must be implemented if a backend + /// supports partial stack slot spills/loads to further disambiguate + /// what the store does. + virtual unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const { + MemBytes = 0; + return isStoreToStackSlot(MI, FrameIndex); + } + /// Check for post-frame ptr elimination stack locations as well. /// This uses a heuristic, so it isn't reliable for correctness. virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, @@ -325,7 +349,7 @@ public: unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const; - /// \brief Clones instruction or the whole instruction bundle \p Orig and + /// Clones instruction or the whole instruction bundle \p Orig and /// insert into \p MBB before \p InsertBefore. The target may update operands /// that are required to be unique. /// @@ -421,7 +445,8 @@ public: /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI /// and \p DefIdx. /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of - /// the list is modeled as <Reg:SubReg, SubIdx>. + /// the list is modeled as <Reg:SubReg, SubIdx>. Operands with the undef + /// flag are not added to this list. /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce /// two elements: /// - %1:sub1, sub0 @@ -446,7 +471,8 @@ public: /// - %1:sub1, sub0 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike(). /// @@ -465,7 +491,8 @@ public: /// - InsertedReg: %1:sub1, sub3 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike(). /// @@ -632,8 +659,8 @@ public: return true; } - /// Generate code to reduce the loop iteration by one and check if the loop is - /// finished. Return the value/register of the the new loop count. We need + /// Generate code to reduce the loop iteration by one and check if the loop + /// is finished. Return the value/register of the new loop count. We need /// this function when peeling off one or more iterations of a loop. This /// function assumes the nth iteration is peeled first. virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, @@ -819,6 +846,15 @@ public: llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!"); } + /// If the specific machine instruction is a instruction that moves/copies + /// value from one register to another register return true along with + /// @Source machine operand and @Destination machine operand. + virtual bool isCopyInstr(const MachineInstr &MI, + const MachineOperand *&SourceOpNum, + const MachineOperand *&Destination) const { + return false; + } + /// Store the specified register of the given register class to the specified /// stack frame index. The store instruction is to be added to the given /// machine basic block before the specified machine instruction. If isKill @@ -873,7 +909,7 @@ public: /// The new instruction is inserted before MI, and the client is responsible /// for removing the old instruction. MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, - int FrameIndex, + int FI, LiveIntervals *LIS = nullptr) const; /// Same as the previous version except it allows folding of any load and @@ -925,13 +961,13 @@ public: /// \param InsInstrs - Vector of new instructions that implement P /// \param DelInstrs - Old instructions, including Root, that could be /// replaced by InsInstr - /// \param InstrIdxForVirtReg - map of virtual register to instruction in + /// \param InstIdxForVirtReg - map of virtual register to instruction in /// InsInstr that defines it virtual void genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, - DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; + DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const; /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to /// reduce critical path length. @@ -950,6 +986,10 @@ public: /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + /// Return true if the given SDNode can be copied during scheduling + /// even if it has glue. + virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; } + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will @@ -976,7 +1016,7 @@ protected: return nullptr; } - /// \brief Target-dependent implementation of getRegSequenceInputs. + /// Target-dependent implementation of getRegSequenceInputs. /// /// \returns true if it is possible to build the equivalent /// REG_SEQUENCE inputs with the pair \p MI, \p DefIdx. False otherwise. @@ -990,7 +1030,7 @@ protected: return false; } - /// \brief Target-dependent implementation of getExtractSubregInputs. + /// Target-dependent implementation of getExtractSubregInputs. /// /// \returns true if it is possible to build the equivalent /// EXTRACT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise. @@ -1004,7 +1044,7 @@ protected: return false; } - /// \brief Target-dependent implementation of getInsertSubregInputs. + /// Target-dependent implementation of getInsertSubregInputs. /// /// \returns true if it is possible to build the equivalent /// INSERT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise. @@ -1426,7 +1466,7 @@ public: return 0; } - /// \brief Return the minimum clearance before an instruction that reads an + /// Return the minimum clearance before an instruction that reads an /// unused register. /// /// For example, AVX instructions may copy part of a register operand into @@ -1493,7 +1533,7 @@ public: return false; } - /// \brief Return the value to use for the MachineCSE's LookAheadLimit, + /// Return the value to use for the MachineCSE's LookAheadLimit, /// which is a heuristic used for CSE'ing phys reg defs. virtual unsigned getMachineCSELookAheadLimit() const { // The default lookahead is small to prevent unprofitable quadratic @@ -1562,64 +1602,32 @@ public: return false; } - /// \brief Describes the number of instructions that it will take to call and - /// construct a frame for a given outlining candidate. - struct MachineOutlinerInfo { - /// Number of instructions to call an outlined function for this candidate. - unsigned CallOverhead; - - /// \brief Number of instructions to construct an outlined function frame - /// for this candidate. - unsigned FrameOverhead; - - /// \brief Represents the specific instructions that must be emitted to - /// construct a call to this candidate. - unsigned CallConstructionID; - - /// \brief Represents the specific instructions that must be emitted to - /// construct a frame for this candidate's outlined function. - unsigned FrameConstructionID; - - MachineOutlinerInfo() {} - MachineOutlinerInfo(unsigned CallOverhead, unsigned FrameOverhead, - unsigned CallConstructionID, - unsigned FrameConstructionID) - : CallOverhead(CallOverhead), FrameOverhead(FrameOverhead), - CallConstructionID(CallConstructionID), - FrameConstructionID(FrameConstructionID) {} - }; - - /// \brief Returns a \p MachineOutlinerInfo struct containing target-specific + /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. - virtual MachineOutlinerInfo getOutlininingCandidateInfo( - std::vector< - std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> - &RepeatedSequenceLocs) const { + virtual outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { llvm_unreachable( - "Target didn't implement TargetInstrInfo::getOutliningOverhead!"); + "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!"); } - /// Represents how an instruction should be mapped by the outliner. - /// \p Legal instructions are those which are safe to outline. - /// \p Illegal instructions are those which cannot be outlined. - /// \p Invisible instructions are instructions which can be outlined, but - /// shouldn't actually impact the outlining result. - enum MachineOutlinerInstrType { Legal, Illegal, Invisible }; - /// Returns how or if \p MI should be outlined. - virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const { + virtual outliner::InstrType + getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningType!"); } - /// Insert a custom epilogue for outlined functions. - /// This may be empty, in which case no epilogue or return statement will be - /// emitted. - virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB, - MachineFunction &MF, - const MachineOutlinerInfo &MInfo) const { + /// Returns target-defined flags defining properties of the MBB for + /// the outliner. + virtual unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const { + return 0x0; + } + + /// Insert a custom frame for outlined functions. + virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { llvm_unreachable( - "Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!"); + "Target didn't implement TargetInstrInfo::buildOutlinedFrame!"); } /// Insert a call to an outlined function into the program. @@ -1628,20 +1636,11 @@ public: virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - const MachineOutlinerInfo &MInfo) const { + const outliner::Candidate &C) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } - /// Insert a custom prologue for outlined functions. - /// This may be empty, in which case no prologue will be emitted. - virtual void insertOutlinerPrologue(MachineBasicBlock &MBB, - MachineFunction &MF, - const MachineOutlinerInfo &MInfo) const { - llvm_unreachable( - "Target didn't implement TargetInstrInfo::insertOutlinerPrologue!"); - } - /// Return true if the function can safely be outlined from. /// A function \p MF is considered safe for outlining if an outlined function /// produced from instructions in F will produce a program which produces the @@ -1652,13 +1651,18 @@ public: "TargetInstrInfo::isFunctionSafeToOutlineFrom!"); } + /// Return true if the function should be outlined from by default. + virtual bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const { + return false; + } + private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; unsigned ReturnOpcode; }; -/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. +/// Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> { using RegInfo = DenseMapInfo<unsigned>; @@ -1672,7 +1676,7 @@ template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> { RegInfo::getTombstoneKey()); } - /// \brief Reuse getHashValue implementation from + /// Reuse getHashValue implementation from /// std::pair<unsigned, unsigned>. static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) { std::pair<unsigned, unsigned> PairVal = std::make_pair(Val.Reg, Val.SubReg); diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 380e3b19dc80..d5ff71cf9ac2 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -29,9 +29,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -52,6 +52,7 @@ #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> @@ -222,7 +223,7 @@ public: virtual ~TargetLoweringBase() = default; protected: - /// \brief Initialize all of the actions to default values. + /// Initialize all of the actions to default values. void initActions(); public: @@ -253,7 +254,8 @@ public: /// A documentation for this function would be nice... virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; - EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const; + EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, + bool LegalTypes = true) const; /// Returns the type to be used for the index operand of: /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, @@ -421,17 +423,17 @@ public: return true; } - /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. + /// Return true if it is cheap to speculate a call to intrinsic cttz. virtual bool isCheapToSpeculateCttz() const { return false; } - /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz. + /// Return true if it is cheap to speculate a call to intrinsic ctlz. virtual bool isCheapToSpeculateCtlz() const { return false; } - /// \brief Return true if ctlz instruction is fast. + /// Return true if ctlz instruction is fast. virtual bool isCtlzFast() const { return false; } @@ -444,13 +446,13 @@ public: return false; } - /// \brief Return true if it is cheaper to split the store of a merged int val + /// Return true if it is cheaper to split the store of a merged int val /// from a pair of smaller values into multiple stores. virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { return false; } - /// \brief Return if the target supports combining a + /// Return if the target supports combining a /// chain like: /// \code /// %andResult = and %val1, #mask @@ -507,7 +509,30 @@ public: return hasAndNotCompare(X); } - /// \brief Return true if the target wants to use the optimization that + /// There are two ways to clear extreme bits (either low or high): + /// Mask: x & (-1 << y) (the instcombine canonical form) + /// Shifts: x >> y << y + /// Return true if the variant with 2 shifts is preferred. + /// Return false if there is no preference. + virtual bool preferShiftsToClearExtremeBits(SDValue X) const { + // By default, let's assume that no one prefers shifts. + return false; + } + + /// Should we tranform the IR-optimal check for whether given truncation + /// down into KeptBits would be truncating or not: + /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) + /// Into it's more traditional form: + /// ((%x << C) a>> C) dstcond %x + /// Return true if we should transform. + /// Return false if there is no preference. + virtual bool shouldTransformSignedTruncationCheck(EVT XVT, + unsigned KeptBits) const { + // By default, let's assume that no one prefers shifts. + return false; + } + + /// Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). bool enableExtLdPromotion() const { return EnableExtLdPromotion; } @@ -746,10 +771,10 @@ public: /// operations don't trap except for integer divide and remainder. virtual bool canOpTrap(unsigned Op, EVT VT) const; - /// Similar to isShuffleMaskLegal. This is used by Targets can use this to - /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace - /// a VAND with a constant pool entry. - virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/, + /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there + /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a + /// constant pool entry. + virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { return false; } @@ -765,6 +790,39 @@ public: return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } + LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { + unsigned EqOpc; + switch (Op) { + default: llvm_unreachable("Unexpected FP pseudo-opcode"); + case ISD::STRICT_FADD: EqOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break; + case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; + case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; + case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + } + + auto Action = getOperationAction(EqOpc, VT); + + // We don't currently handle Custom or Promote for strict FP pseudo-ops. + // For now, we just expand for those cases. + if (Action != Legal) + Action = Expand; + + return Action; + } + /// Return true if the specified operation is legal on this target or can be /// made legal with custom lowering. This is used to help guide high-level /// lowering decisions. @@ -800,7 +858,7 @@ public: } /// Return true if lowering to a jump table is allowed. - bool areJTsAllowed(const Function *Fn) const { + virtual bool areJTsAllowed(const Function *Fn) const { if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") return false; @@ -812,7 +870,7 @@ public: bool rangeFitsInWord(const APInt &Low, const APInt &High, const DataLayout &DL) const { // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DL.getPointerSizeInBits(); + uint64_t BW = DL.getIndexSizeInBits(0u); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } @@ -820,7 +878,7 @@ public: /// Return true if lowering to a jump table is suitable for a set of case /// clusters which may contain \p NumCases cases, \p Range range of values. /// FIXME: This function check the maximum table size and density, but the - /// minimum size is not checked. It would be nice if the the minimum size is + /// minimum size is not checked. It would be nice if the minimum size is /// also combined within this function. Currently, the minimum size check is /// performed in findJumpTable() in SelectionDAGBuiler and /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. @@ -986,9 +1044,14 @@ public: /// Return true if the specified condition code is legal on this target. bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { - return - getCondCodeAction(CC, VT) == Legal || - getCondCodeAction(CC, VT) == Custom; + return getCondCodeAction(CC, VT) == Legal; + } + + /// Return true if the specified condition code is legal or custom on this + /// target. + bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { + return getCondCodeAction(CC, VT) == Legal || + getCondCodeAction(CC, VT) == Custom; } /// If the action for this operation is to promote, this method returns the @@ -1110,10 +1173,6 @@ public: /// Certain combinations of ABIs, Targets and features require that types /// are legal for some operations and not for other operations. /// For MIPS all vector types must be passed through the integer register set. - virtual MVT getRegisterTypeForCallingConv(MVT VT) const { - return getRegisterType(VT); - } - virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, EVT VT) const { return getRegisterType(Context, VT); @@ -1172,7 +1231,7 @@ public: return getPointerTy(DL).getSizeInBits(); } - /// \brief Get maximum # of store operations permitted for llvm.memset + /// Get maximum # of store operations permitted for llvm.memset /// /// This function returns the maximum number of store operations permitted /// to replace a call to llvm.memset. The value is set by the target at the @@ -1182,7 +1241,7 @@ public: return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; } - /// \brief Get maximum # of store operations permitted for llvm.memcpy + /// Get maximum # of store operations permitted for llvm.memcpy /// /// This function returns the maximum number of store operations permitted /// to replace a call to llvm.memcpy. The value is set by the target at the @@ -1192,6 +1251,15 @@ public: return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; } + /// \brief Get maximum # of store operations to be glued together + /// + /// This function returns the maximum number of store operations permitted + /// to glue together during lowering of llvm.memcpy. The value is set by + // the target at the performance threshold for such a replacement. + virtual unsigned getMaxGluedStoresPerMemcpy() const { + return MaxGluedStoresPerMemcpy; + } + /// Get maximum # of load operations permitted for memcmp /// /// This function returns the maximum number of load operations permitted @@ -1202,7 +1270,19 @@ public: return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; } - /// \brief Get maximum # of store operations permitted for llvm.memmove + /// For memcmp expansion when the memcmp result is only compared equal or + /// not-equal to 0, allow up to this number of load pairs per block. As an + /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block: + /// a0 = load2bytes &a[0] + /// b0 = load2bytes &b[0] + /// a2 = load1byte &a[2] + /// b2 = load1byte &b[2] + /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 + virtual unsigned getMemcmpEqZeroLoadsPerBlock() const { + return 1; + } + + /// Get maximum # of store operations permitted for llvm.memmove /// /// This function returns the maximum number of store operations permitted /// to replace a call to llvm.memmove. The value is set by the target at the @@ -1212,7 +1292,7 @@ public: return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; } - /// \brief Determine if the target supports unaligned memory accesses. + /// Determine if the target supports unaligned memory accesses. /// /// This function returns true if the target allows unaligned memory accesses /// of the specified type in the given address space. If true, it also returns @@ -1350,7 +1430,7 @@ public: /// If the target has a standard location for the stack protector guard, /// returns the address of that location. Otherwise, returns nullptr. /// DEPRECATED: please override useLoadStackGuardNode and customize - /// LOAD_STACK_GUARD, or customize @llvm.stackguard(). + /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; /// Inserts necessary declarations for SSP (stack protection) purpose. @@ -1905,7 +1985,7 @@ public: Type *Ty, unsigned AddrSpace, Instruction *I = nullptr) const; - /// \brief Return the cost of the scaling factor used in the addressing mode + /// Return the cost of the scaling factor used in the addressing mode /// represented by AM for this target, for a load/store of the specified type. /// /// If the AM is supported, the return value must be >= 0. @@ -2098,11 +2178,14 @@ public: return false; } - /// \brief Get the maximum supported factor for interleaved memory accesses. + /// Return true if the target has a vector blend instruction. + virtual bool hasVectorBlend() const { return false; } + + /// Get the maximum supported factor for interleaved memory accesses. /// Default to be the minimum interleave factor: 2. virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } - /// \brief Lower an interleaved load to target specific intrinsics. Return + /// Lower an interleaved load to target specific intrinsics. Return /// true on success. /// /// \p LI is the vector load instruction. @@ -2116,7 +2199,7 @@ public: return false; } - /// \brief Lower an interleaved store to target specific intrinsics. Return + /// Lower an interleaved store to target specific intrinsics. Return /// true on success. /// /// \p SI is the vector store instruction. @@ -2189,7 +2272,7 @@ public: return false; } - /// \brief Return true if it is beneficial to convert a load of a constant to + /// Return true if it is beneficial to convert a load of a constant to /// just the constant itself. /// On some targets it might be more efficient to use a combination of /// arithmetic instructions to materialize the constant instead of loading it @@ -2214,6 +2297,11 @@ public: return false; } + // Return true if CodeGenPrepare should consider splitting large offset of a + // GEP to make the GEP fit into the addressing mode and can be sunk into the + // same blocks of its users. + virtual bool shouldConsiderGEPOffsetSplit() const { return false; } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -2453,7 +2541,7 @@ protected: /// expected to be merged. unsigned GatherAllAliasesMaxDepth; - /// \brief Specify maximum number of store instructions per memset call. + /// Specify maximum number of store instructions per memset call. /// /// When lowering \@llvm.memset this field specifies the maximum number of /// store operations that may be substituted for the call to memset. Targets @@ -2469,7 +2557,7 @@ protected: /// to memset, used for functions with OptSize attribute. unsigned MaxStoresPerMemsetOptSize; - /// \brief Specify maximum bytes of store instructions per memcpy call. + /// Specify maximum bytes of store instructions per memcpy call. /// /// When lowering \@llvm.memcpy this field specifies the maximum number of /// store operations that may be substituted for a call to memcpy. Targets @@ -2482,13 +2570,21 @@ protected: /// constant size. unsigned MaxStoresPerMemcpy; + + /// \brief Specify max number of store instructions to glue in inlined memcpy. + /// + /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number + /// of store instructions to keep together. This helps in pairing and + // vectorization later on. + unsigned MaxGluedStoresPerMemcpy = 0; + /// Maximum number of store operations that may be substituted for a call to /// memcpy, used for functions with OptSize attribute. unsigned MaxStoresPerMemcpyOptSize; unsigned MaxLoadsPerMemcmp; unsigned MaxLoadsPerMemcmpOptSize; - /// \brief Specify maximum bytes of store instructions per memmove call. + /// Specify maximum bytes of store instructions per memmove call. /// /// When lowering \@llvm.memmove this field specifies the maximum number of /// store instructions that may be substituted for a call to memmove. Targets @@ -2520,6 +2616,16 @@ protected: /// sequence of memory operands that is recognized by PrologEpilogInserter. MachineBasicBlock *emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const; + + /// Replace/modify the XRay custom event operands with target-dependent + /// details. + MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const; + + /// Replace/modify the XRay typed event operands with target-dependent + /// details. + MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const; }; /// This class defines information used to lower LLVM code to legal SelectionDAG @@ -2539,6 +2645,16 @@ public: bool isPositionIndependent() const; + virtual bool isSDNodeSourceOfDivergence(const SDNode *N, + FunctionLoweringInfo *FLI, + DivergenceAnalysis *DA) const { + return false; + } + + virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { + return false; + } + /// Returns true by value, base pointer and offset pointer and addressing mode /// by reference if the node's address can be legally represented as /// pre-indexed load / store address. @@ -2690,6 +2806,30 @@ public: bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const; + /// Look at Vector Op. At this point, we know that only the DemandedElts + /// elements of the result of Op are ever used downstream. If we can use + /// this information to simplify Op, create a new simplified DAG node and + /// return true, storing the original and new nodes in TLO. + /// Otherwise, analyze the expression and return a mask of KnownUndef and + /// KnownZero elements for the expression (used to simplify the caller). + /// The KnownUndef/Zero elements may only be accurate for those bits + /// in the DemandedMask. + /// \p AssumeSingleUse When this parameter is true, this function will + /// attempt to simplify \p Op even if there are multiple uses. + /// Callers are responsible for correctly updating the DAG based on the + /// results of this function, because simply replacing replacing TLO.Old + /// with TLO.New will be incorrect when this parameter is true and TLO.Old + /// has multiple uses. + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, + APInt &KnownUndef, APInt &KnownZero, + TargetLoweringOpt &TLO, unsigned Depth = 0, + bool AssumeSingleUse = false) const; + + /// Helper wrapper around SimplifyDemandedVectorElts + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, + APInt &KnownUndef, APInt &KnownZero, + DAGCombinerInfo &DCI) const; + /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts /// argument allows us to only collect the known bits that are shared by the @@ -2718,6 +2858,15 @@ public: const SelectionDAG &DAG, unsigned Depth = 0) const; + /// Attempt to simplify any target nodes based on the demanded vector + /// elements, returning true on success. Otherwise, analyze the expression and + /// return a mask of KnownUndef and KnownZero elements for the expression + /// (used to simplify the caller). The KnownUndef/Zero elements may only be + /// accurate for those bits in the DemandedMask + virtual bool SimplifyDemandedVectorEltsForTargetNode( + SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, + APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; @@ -2731,7 +2880,7 @@ public: bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } - bool isAfterLegalizeVectorOps() const { + bool isAfterLegalizeDAG() const { return Level == AfterLegalizeDAG; } CombineLevel getDAGCombineLevel() { return Level; } @@ -2753,12 +2902,8 @@ public: /// from getBooleanContents(). bool isConstFalseVal(const SDNode *N) const; - /// Return a constant of type VT that contains a true value that respects - /// getBooleanContents() - SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const; - /// Return if \p N is a True value when extended to \p VT. - bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const; + bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. @@ -3479,7 +3624,7 @@ public: /// bounds the returned pointer is unspecified, but will be within the vector /// bounds. SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, - SDValue Idx) const; + SDValue Index) const; //===--------------------------------------------------------------------===// // Instruction Emitting Hooks @@ -3518,6 +3663,13 @@ public: virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; + /// Expands target specific indirect branch for the case of JumpTable + /// expanasion. + virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, + SelectionDAG &DAG) const { + return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); + } + // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) // If we're comparing for equality to zero and isCtlzFast is true, expose the // fact that this can be implemented as a ctlz/srl pair, so that the dag @@ -3528,6 +3680,11 @@ private: SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + + SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const; }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/include/llvm/CodeGen/TargetLoweringObjectFile.h b/include/llvm/CodeGen/TargetLoweringObjectFile.h deleted file mode 100644 index fe77c2954129..000000000000 --- a/include/llvm/CodeGen/TargetLoweringObjectFile.h +++ /dev/null @@ -1,194 +0,0 @@ -//===-- llvm/CodeGen/TargetLoweringObjectFile.h - Object Info ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements classes used to handle lowerings specific to common -// object file formats. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H -#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/SectionKind.h" -#include <cstdint> - -namespace llvm { - -class GlobalValue; -class MachineModuleInfo; -class Mangler; -class MCContext; -class MCExpr; -class MCSection; -class MCSymbol; -class MCSymbolRefExpr; -class MCStreamer; -class MCValue; -class TargetMachine; - -class TargetLoweringObjectFile : public MCObjectFileInfo { - MCContext *Ctx = nullptr; - - /// Name-mangler for global names. - Mangler *Mang = nullptr; - -protected: - bool SupportIndirectSymViaGOTPCRel = false; - bool SupportGOTPCRelWithOffset = true; - - /// This section contains the static constructor pointer list. - MCSection *StaticCtorSection = nullptr; - - /// This section contains the static destructor pointer list. - MCSection *StaticDtorSection = nullptr; - -public: - TargetLoweringObjectFile() = default; - TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete; - TargetLoweringObjectFile & - operator=(const TargetLoweringObjectFile &) = delete; - virtual ~TargetLoweringObjectFile(); - - MCContext &getContext() const { return *Ctx; } - Mangler &getMangler() const { return *Mang; } - - /// This method must be called before any actual lowering is done. This - /// specifies the current context for codegen, and gives the lowering - /// implementations a chance to set up their default sections. - virtual void Initialize(MCContext &ctx, const TargetMachine &TM); - - virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, - const MCSymbol *Sym) const; - - /// Emit the module-level metadata that the platform cares about. - virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M, - const TargetMachine &TM) const {} - - /// Given a constant with the SectionKind, return a section that it should be - /// placed in. - virtual MCSection *getSectionForConstant(const DataLayout &DL, - SectionKind Kind, - const Constant *C, - unsigned &Align) const; - - /// Classify the specified global variable into a set of target independent - /// categories embodied in SectionKind. - static SectionKind getKindForGlobal(const GlobalObject *GO, - const TargetMachine &TM); - - /// This method computes the appropriate section to emit the specified global - /// variable or function definition. This should not be passed external (or - /// available externally) globals. - MCSection *SectionForGlobal(const GlobalObject *GO, SectionKind Kind, - const TargetMachine &TM) const; - - /// This method computes the appropriate section to emit the specified global - /// variable or function definition. This should not be passed external (or - /// available externally) globals. - MCSection *SectionForGlobal(const GlobalObject *GO, - const TargetMachine &TM) const { - return SectionForGlobal(GO, getKindForGlobal(GO, TM), TM); - } - - virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName, - const GlobalValue *GV, - const TargetMachine &TM) const; - - virtual MCSection *getSectionForJumpTable(const Function &F, - const TargetMachine &TM) const; - - virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, - const Function &F) const; - - /// Targets should implement this method to assign a section to globals with - /// an explicit section specfied. The implementation of this method can - /// assume that GO->hasSection() is true. - virtual MCSection * - getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, - const TargetMachine &TM) const = 0; - - /// Return an MCExpr to use for a reference to the specified global variable - /// from exception handling information. - virtual const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, - unsigned Encoding, - const TargetMachine &TM, - MachineModuleInfo *MMI, - MCStreamer &Streamer) const; - - /// Return the MCSymbol for a private symbol with global value name as its - /// base, with the specified suffix. - MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV, - StringRef Suffix, - const TargetMachine &TM) const; - - // The symbol that gets passed to .cfi_personality. - virtual MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, - const TargetMachine &TM, - MachineModuleInfo *MMI) const; - - const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, - MCStreamer &Streamer) const; - - virtual MCSection *getStaticCtorSection(unsigned Priority, - const MCSymbol *KeySym) const { - return StaticCtorSection; - } - - virtual MCSection *getStaticDtorSection(unsigned Priority, - const MCSymbol *KeySym) const { - return StaticDtorSection; - } - - /// \brief Create a symbol reference to describe the given TLS variable when - /// emitting the address in debug info. - virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; - - virtual const MCExpr *lowerRelativeReference(const GlobalValue *LHS, - const GlobalValue *RHS, - const TargetMachine &TM) const { - return nullptr; - } - - /// \brief Target supports replacing a data "PC"-relative access to a symbol - /// through another symbol, by accessing the later via a GOT entry instead? - bool supportIndirectSymViaGOTPCRel() const { - return SupportIndirectSymViaGOTPCRel; - } - - /// \brief Target GOT "PC"-relative relocation supports encoding an additional - /// binary expression with an offset? - bool supportGOTPCRelWithOffset() const { - return SupportGOTPCRelWithOffset; - } - - /// \brief Get the target specific PC relative GOT entry relocation - virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, - const MCValue &MV, - int64_t Offset, - MachineModuleInfo *MMI, - MCStreamer &Streamer) const { - return nullptr; - } - - virtual void emitLinkerFlagsForGlobal(raw_ostream &OS, - const GlobalValue *GV) const {} - -protected: - virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO, - SectionKind Kind, - const TargetMachine &TM) const = 0; -}; - -} // end namespace llvm - -#endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 69de9f8cb35d..f5c7fc824ab4 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { @@ -36,16 +36,18 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { protected: MCSymbolRefExpr::VariantKind PLTRelativeVariantKind = MCSymbolRefExpr::VK_None; + const TargetMachine *TM; public: TargetLoweringObjectFileELF() = default; ~TargetLoweringObjectFileELF() override = default; + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + /// Emit Obj-C garbage collection and linker options. - void emitModuleMetadata(MCStreamer &Streamer, Module &M, - const TargetMachine &TM) const override; + void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override; - void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, + void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const override; /// Given a constant with the SectionKind, return a section that it should be @@ -98,8 +100,7 @@ public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; /// Emit the module flags that specify the garbage collection information. - void emitModuleMetadata(MCStreamer &Streamer, Module &M, - const TargetMachine &TM) const override; + void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override; MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; @@ -153,8 +154,7 @@ public: const TargetMachine &TM) const override; /// Emit Obj-C garbage collection and linker options. - void emitModuleMetadata(MCStreamer &Streamer, Module &M, - const TargetMachine &TM) const override; + void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override; MCSection *getStaticCtorSection(unsigned Priority, const MCSymbol *KeySym) const override; @@ -163,6 +163,19 @@ public: void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV) const override; + + void emitLinkerFlagsForUsed(raw_ostream &OS, + const GlobalValue *GV) const override; + + const MCExpr *lowerRelativeReference(const GlobalValue *LHS, + const GlobalValue *RHS, + const TargetMachine &TM) const override; + + /// Given a mergeable constant with the specified size and relocation + /// information, return a section that it should be placed in. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, + unsigned &Align) const override; }; class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile { diff --git a/include/llvm/CodeGen/TargetOpcodes.def b/include/llvm/CodeGen/TargetOpcodes.def deleted file mode 100644 index d3e8483798a7..000000000000 --- a/include/llvm/CodeGen/TargetOpcodes.def +++ /dev/null @@ -1,461 +0,0 @@ -//===-- llvm/CodeGen/TargetOpcodes.def - Target Indep Opcodes ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the target independent instruction opcodes. -// -//===----------------------------------------------------------------------===// - -// NOTE: NO INCLUDE GUARD DESIRED! - -/// HANDLE_TARGET_OPCODE defines an opcode and its associated enum value. -/// -#ifndef HANDLE_TARGET_OPCODE -#define HANDLE_TARGET_OPCODE(OPC, NUM) -#endif - -/// HANDLE_TARGET_OPCODE_MARKER defines an alternative identifier for an opcode. -/// -#ifndef HANDLE_TARGET_OPCODE_MARKER -#define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) -#endif - -/// Every instruction defined here must also appear in Target.td. -/// -HANDLE_TARGET_OPCODE(PHI) -HANDLE_TARGET_OPCODE(INLINEASM) -HANDLE_TARGET_OPCODE(CFI_INSTRUCTION) -HANDLE_TARGET_OPCODE(EH_LABEL) -HANDLE_TARGET_OPCODE(GC_LABEL) -HANDLE_TARGET_OPCODE(ANNOTATION_LABEL) - -/// KILL - This instruction is a noop that is used only to adjust the -/// liveness of registers. This can be useful when dealing with -/// sub-registers. -HANDLE_TARGET_OPCODE(KILL) - -/// EXTRACT_SUBREG - This instruction takes two operands: a register -/// that has subregisters, and a subregister index. It returns the -/// extracted subregister value. This is commonly used to implement -/// truncation operations on target architectures which support it. -HANDLE_TARGET_OPCODE(EXTRACT_SUBREG) - -/// INSERT_SUBREG - This instruction takes three operands: a register that -/// has subregisters, a register providing an insert value, and a -/// subregister index. It returns the value of the first register with the -/// value of the second register inserted. The first register is often -/// defined by an IMPLICIT_DEF, because it is commonly used to implement -/// anyext operations on target architectures which support it. -HANDLE_TARGET_OPCODE(INSERT_SUBREG) - -/// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef. -HANDLE_TARGET_OPCODE(IMPLICIT_DEF) - -/// SUBREG_TO_REG - Assert the value of bits in a super register. -/// The result of this instruction is the value of the second operand inserted -/// into the subregister specified by the third operand. All other bits are -/// assumed to be equal to the bits in the immediate integer constant in the -/// first operand. This instruction just communicates information; No code -/// should be generated. -/// This is typically used after an instruction where the write to a subregister -/// implicitly cleared the bits in the super registers. -HANDLE_TARGET_OPCODE(SUBREG_TO_REG) - -/// COPY_TO_REGCLASS - This instruction is a placeholder for a plain -/// register-to-register copy into a specific register class. This is only -/// used between instruction selection and MachineInstr creation, before -/// virtual registers have been created for all the instructions, and it's -/// only needed in cases where the register classes implied by the -/// instructions are insufficient. It is emitted as a COPY MachineInstr. - HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) - -/// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic -HANDLE_TARGET_OPCODE(DBG_VALUE) - -/// REG_SEQUENCE - This variadic instruction is used to form a register that -/// represents a consecutive sequence of sub-registers. It's used as a -/// register coalescing / allocation aid and must be eliminated before code -/// emission. -// In SDNode form, the first operand encodes the register class created by -// the REG_SEQUENCE, while each subsequent pair names a vreg + subreg index -// pair. Once it has been lowered to a MachineInstr, the regclass operand -// is no longer present. -/// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 -/// After register coalescing references of v1024 should be replace with -/// v1027:3, v1025 with v1027:4, etc. - HANDLE_TARGET_OPCODE(REG_SEQUENCE) - -/// COPY - Target-independent register copy. This instruction can also be -/// used to copy between subregisters of virtual registers. - HANDLE_TARGET_OPCODE(COPY) - -/// BUNDLE - This instruction represents an instruction bundle. Instructions -/// which immediately follow a BUNDLE instruction which are marked with -/// 'InsideBundle' flag are inside the bundle. -HANDLE_TARGET_OPCODE(BUNDLE) - -/// Lifetime markers. -HANDLE_TARGET_OPCODE(LIFETIME_START) -HANDLE_TARGET_OPCODE(LIFETIME_END) - -/// A Stackmap instruction captures the location of live variables at its -/// position in the instruction stream. It is followed by a shadow of bytes -/// that must lie within the function and not contain another stackmap. -HANDLE_TARGET_OPCODE(STACKMAP) - -/// FEntry all - This is a marker instruction which gets translated into a raw fentry call. -HANDLE_TARGET_OPCODE(FENTRY_CALL) - -/// Patchable call instruction - this instruction represents a call to a -/// constant address, followed by a series of NOPs. It is intended to -/// support optimizations for dynamic languages (such as javascript) that -/// rewrite calls to runtimes with more efficient code sequences. -/// This also implies a stack map. -HANDLE_TARGET_OPCODE(PATCHPOINT) - -/// This pseudo-instruction loads the stack guard value. Targets which need -/// to prevent the stack guard value or address from being spilled to the -/// stack should override TargetLowering::emitLoadStackGuardNode and -/// additionally expand this pseudo after register allocation. -HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD) - -/// Call instruction with associated vm state for deoptimization and list -/// of live pointers for relocation by the garbage collector. It is -/// intended to support garbage collection with fully precise relocating -/// collectors and deoptimizations in either the callee or caller. -HANDLE_TARGET_OPCODE(STATEPOINT) - -/// Instruction that records the offset of a local stack allocation passed to -/// llvm.localescape. It has two arguments: the symbol for the label and the -/// frame index of the local stack allocation. -HANDLE_TARGET_OPCODE(LOCAL_ESCAPE) - -/// Wraps a machine instruction which can fault, bundled with associated -/// information on how to handle such a fault. -/// For example loading instruction that may page fault, bundled with associated -/// information on how to handle such a page fault. It is intended to support -/// "zero cost" null checks in managed languages by allowing LLVM to fold -/// comparisons into existing memory operations. -HANDLE_TARGET_OPCODE(FAULTING_OP) - -/// Wraps a machine instruction to add patchability constraints. An -/// instruction wrapped in PATCHABLE_OP has to either have a minimum -/// size or be preceded with a nop of that size. The first operand is -/// an immediate denoting the minimum size of the instruction, the -/// second operand is an immediate denoting the opcode of the original -/// instruction. The rest of the operands are the operands of the -/// original instruction. -HANDLE_TARGET_OPCODE(PATCHABLE_OP) - -/// This is a marker instruction which gets translated into a nop sled, useful -/// for inserting instrumentation instructions at runtime. -HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER) - -/// Wraps a return instruction and its operands to enable adding nop sleds -/// either before or after the return. The nop sleds are useful for inserting -/// instrumentation instructions at runtime. -/// The patch here replaces the return instruction. -HANDLE_TARGET_OPCODE(PATCHABLE_RET) - -/// This is a marker instruction which gets translated into a nop sled, useful -/// for inserting instrumentation instructions at runtime. -/// The patch here prepends the return instruction. -/// The same thing as in x86_64 is not possible for ARM because it has multiple -/// return instructions. Furthermore, CPU allows parametrized and even -/// conditional return instructions. In the current ARM implementation we are -/// making use of the fact that currently LLVM doesn't seem to generate -/// conditional return instructions. -/// On ARM, the same instruction can be used for popping multiple registers -/// from the stack and returning (it just pops pc register too), and LLVM -/// generates it sometimes. So we can't insert the sled between this stack -/// adjustment and the return without splitting the original instruction into 2 -/// instructions. So on ARM, rather than jumping into the exit trampoline, we -/// call it, it does the tracing, preserves the stack and returns. -HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT) - -/// Wraps a tail call instruction and its operands to enable adding nop sleds -/// either before or after the tail exit. We use this as a disambiguation from -/// PATCHABLE_RET which specifically only works for return instructions. -HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) - -/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be -/// patched to insert instrumentation instructions. -HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) - -/// The following generic opcodes are not supposed to appear after ISel. -/// This is something we might want to relax, but for now, this is convenient -/// to produce diagnostics. - -/// Generic ADD instruction. This is an integer add. -HANDLE_TARGET_OPCODE(G_ADD) -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD) - -/// Generic SUB instruction. This is an integer sub. -HANDLE_TARGET_OPCODE(G_SUB) - -// Generic multiply instruction. -HANDLE_TARGET_OPCODE(G_MUL) - -// Generic signed division instruction. -HANDLE_TARGET_OPCODE(G_SDIV) - -// Generic unsigned division instruction. -HANDLE_TARGET_OPCODE(G_UDIV) - -// Generic signed remainder instruction. -HANDLE_TARGET_OPCODE(G_SREM) - -// Generic unsigned remainder instruction. -HANDLE_TARGET_OPCODE(G_UREM) - -/// Generic bitwise and instruction. -HANDLE_TARGET_OPCODE(G_AND) - -/// Generic bitwise or instruction. -HANDLE_TARGET_OPCODE(G_OR) - -/// Generic bitwise exclusive-or instruction. -HANDLE_TARGET_OPCODE(G_XOR) - - -HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF) - -/// Generic PHI instruction with types. -HANDLE_TARGET_OPCODE(G_PHI) - -/// Generic instruction to materialize the address of an alloca or other -/// stack-based object. -HANDLE_TARGET_OPCODE(G_FRAME_INDEX) - -/// Generic reference to global value. -HANDLE_TARGET_OPCODE(G_GLOBAL_VALUE) - -/// Generic instruction to extract blocks of bits from the register given -/// (typically a sub-register COPY after instruction selection). -HANDLE_TARGET_OPCODE(G_EXTRACT) - -HANDLE_TARGET_OPCODE(G_UNMERGE_VALUES) - -/// Generic instruction to insert blocks of bits from the registers given into -/// the source. -HANDLE_TARGET_OPCODE(G_INSERT) - -/// Generic instruction to paste a variable number of components together into a -/// larger register. -HANDLE_TARGET_OPCODE(G_MERGE_VALUES) - -/// Generic pointer to int conversion. -HANDLE_TARGET_OPCODE(G_PTRTOINT) - -/// Generic int to pointer conversion. -HANDLE_TARGET_OPCODE(G_INTTOPTR) - -/// Generic bitcast. The source and destination types must be different, or a -/// COPY is the relevant instruction. -HANDLE_TARGET_OPCODE(G_BITCAST) - -/// Generic load. -HANDLE_TARGET_OPCODE(G_LOAD) - -/// Generic store. -HANDLE_TARGET_OPCODE(G_STORE) - -/// Generic atomic cmpxchg with internal success check. -HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) - -/// Generic atomic cmpxchg. -HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG) - -/// Generic atomicrmw. -HANDLE_TARGET_OPCODE(G_ATOMICRMW_XCHG) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_ADD) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_SUB) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_AND) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_NAND) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_OR) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_XOR) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) -HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) - -/// Generic conditional branch instruction. -HANDLE_TARGET_OPCODE(G_BRCOND) - -/// Generic indirect branch instruction. -HANDLE_TARGET_OPCODE(G_BRINDIRECT) - -/// Generic intrinsic use (without side effects). -HANDLE_TARGET_OPCODE(G_INTRINSIC) - -/// Generic intrinsic use (with side effects). -HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS) - -/// Generic extension allowing rubbish in high bits. -HANDLE_TARGET_OPCODE(G_ANYEXT) - -/// Generic instruction to discard the high bits of a register. This differs -/// from (G_EXTRACT val, 0) on its action on vectors: G_TRUNC will truncate -/// each element individually, G_EXTRACT will typically discard the high -/// elements of the vector. -HANDLE_TARGET_OPCODE(G_TRUNC) - -/// Generic integer constant. -HANDLE_TARGET_OPCODE(G_CONSTANT) - -/// Generic floating constant. -HANDLE_TARGET_OPCODE(G_FCONSTANT) - -/// Generic va_start instruction. Stores to its one pointer operand. -HANDLE_TARGET_OPCODE(G_VASTART) - -/// Generic va_start instruction. Stores to its one pointer operand. -HANDLE_TARGET_OPCODE(G_VAARG) - -// Generic sign extend -HANDLE_TARGET_OPCODE(G_SEXT) - -// Generic zero extend -HANDLE_TARGET_OPCODE(G_ZEXT) - -// Generic left-shift -HANDLE_TARGET_OPCODE(G_SHL) - -// Generic logical right-shift -HANDLE_TARGET_OPCODE(G_LSHR) - -// Generic arithmetic right-shift -HANDLE_TARGET_OPCODE(G_ASHR) - -/// Generic integer-base comparison, also applicable to vectors of integers. -HANDLE_TARGET_OPCODE(G_ICMP) - -/// Generic floating-point comparison, also applicable to vectors. -HANDLE_TARGET_OPCODE(G_FCMP) - -/// Generic select. -HANDLE_TARGET_OPCODE(G_SELECT) - -/// Generic unsigned add instruction, consuming the normal operands plus a carry -/// flag, and similarly producing the result and a carry flag. -HANDLE_TARGET_OPCODE(G_UADDE) - -/// Generic unsigned subtract instruction, consuming the normal operands plus a -/// carry flag, and similarly producing the result and a carry flag. -HANDLE_TARGET_OPCODE(G_USUBE) - -/// Generic signed add instruction, producing the result and a signed overflow -/// flag. -HANDLE_TARGET_OPCODE(G_SADDO) - -/// Generic signed subtract instruction, producing the result and a signed -/// overflow flag. -HANDLE_TARGET_OPCODE(G_SSUBO) - -/// Generic unsigned multiply instruction, producing the result and a signed -/// overflow flag. -HANDLE_TARGET_OPCODE(G_UMULO) - -/// Generic signed multiply instruction, producing the result and a signed -/// overflow flag. -HANDLE_TARGET_OPCODE(G_SMULO) - -// Multiply two numbers at twice the incoming bit width (unsigned) and return -// the high half of the result. -HANDLE_TARGET_OPCODE(G_UMULH) - -// Multiply two numbers at twice the incoming bit width (signed) and return -// the high half of the result. -HANDLE_TARGET_OPCODE(G_SMULH) - -/// Generic FP addition. -HANDLE_TARGET_OPCODE(G_FADD) - -/// Generic FP subtraction. -HANDLE_TARGET_OPCODE(G_FSUB) - -/// Generic FP multiplication. -HANDLE_TARGET_OPCODE(G_FMUL) - -/// Generic FMA multiplication. Behaves like llvm fma intrinsic -HANDLE_TARGET_OPCODE(G_FMA) - -/// Generic FP division. -HANDLE_TARGET_OPCODE(G_FDIV) - -/// Generic FP remainder. -HANDLE_TARGET_OPCODE(G_FREM) - -/// Generic FP exponentiation. -HANDLE_TARGET_OPCODE(G_FPOW) - -/// Generic base-e exponential of a value. -HANDLE_TARGET_OPCODE(G_FEXP) - -/// Generic base-2 exponential of a value. -HANDLE_TARGET_OPCODE(G_FEXP2) - -/// Floating point base-e logarithm of a value. -HANDLE_TARGET_OPCODE(G_FLOG) - -/// Floating point base-2 logarithm of a value. -HANDLE_TARGET_OPCODE(G_FLOG2) - -/// Generic FP negation. -HANDLE_TARGET_OPCODE(G_FNEG) - -/// Generic FP extension. -HANDLE_TARGET_OPCODE(G_FPEXT) - -/// Generic float to signed-int conversion -HANDLE_TARGET_OPCODE(G_FPTRUNC) - -/// Generic float to signed-int conversion -HANDLE_TARGET_OPCODE(G_FPTOSI) - -/// Generic float to unsigned-int conversion -HANDLE_TARGET_OPCODE(G_FPTOUI) - -/// Generic signed-int to float conversion -HANDLE_TARGET_OPCODE(G_SITOFP) - -/// Generic unsigned-int to float conversion -HANDLE_TARGET_OPCODE(G_UITOFP) - -/// Generic pointer offset -HANDLE_TARGET_OPCODE(G_GEP) - -/// Clear the specified number of low bits in a pointer. This rounds the value -/// *down* to the given alignment. -HANDLE_TARGET_OPCODE(G_PTR_MASK) - -/// Generic BRANCH instruction. This is an unconditional branch. -HANDLE_TARGET_OPCODE(G_BR) - -/// Generic insertelement. -HANDLE_TARGET_OPCODE(G_INSERT_VECTOR_ELT) - -/// Generic extractelement. -HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) - -/// Generic shufflevector. -HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) - -/// Generic byte swap. -HANDLE_TARGET_OPCODE(G_BSWAP) - -// TODO: Add more generic opcodes as we move along. - -/// Marker for the end of the generic opcode. -/// This is used to check if an opcode is in the range of the -/// generic opcodes. -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BSWAP) - -/// BUILTIN_OP_END - This must be the last enum value in this list. -/// The target-specific post-isel opcode values start here. -HANDLE_TARGET_OPCODE_MARKER(GENERIC_OP_END, PRE_ISEL_GENERIC_OPCODE_END) diff --git a/include/llvm/CodeGen/TargetOpcodes.h b/include/llvm/CodeGen/TargetOpcodes.h index 3ca31a970944..d0d959c4ae11 100644 --- a/include/llvm/CodeGen/TargetOpcodes.h +++ b/include/llvm/CodeGen/TargetOpcodes.h @@ -22,7 +22,7 @@ namespace TargetOpcode { enum { #define HANDLE_TARGET_OPCODE(OPC) OPC, #define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) IDENT = OPC, -#include "llvm/CodeGen/TargetOpcodes.def" +#include "llvm/Support/TargetOpcodes.def" }; } // end namespace TargetOpcode diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index 1aaa85d77a54..5918c524d11c 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -84,20 +84,6 @@ template <> struct isPodLike<IdentifyingPassPtr> { /// This is an ImmutablePass solely for the purpose of exposing CodeGen options /// to the internals of other CodeGen passes. class TargetPassConfig : public ImmutablePass { -public: - /// Pseudo Pass IDs. These are defined within TargetPassConfig because they - /// are unregistered pass IDs. They are only useful for use with - /// TargetPassConfig APIs to identify multiple occurrences of the same pass. - /// - - /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early - /// during codegen, on SSA form. - static char EarlyTailDuplicateID; - - /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine - /// optimization after regalloc. - static char PostRAMachineLICMID; - private: PassManagerBase *PM = nullptr; AnalysisID StartBefore = nullptr; @@ -218,9 +204,6 @@ public: /// Return true if the optimized regalloc pipeline is enabled. bool getOptimizeRegAlloc() const; - /// Return true if shrink wrapping is enabled. - bool getEnableShrinkWrap() const; - /// Return true if the default global register allocator is in use and /// has not be overriden on the command line with '-regalloc=...' bool usingDefaultRegAlloc() const; @@ -229,7 +212,7 @@ public: /// representation to the MI representation. /// Adds IR based lowering and target specific optimization passes and finally /// the core instruction selection passes. - /// \returns true if an error occured, false otherwise. + /// \returns true if an error occurred, false otherwise. bool addISelPasses(); /// Add common target configurable passes that perform LLVM IR to IR @@ -320,14 +303,10 @@ public: /// verification is enabled. void addVerifyPass(const std::string &Banner); - /// Check whether or not GlobalISel should be enabled by default. - /// Fallback/abort behavior is controlled via other methods. - virtual bool isGlobalISelEnabled() const; - /// Check whether or not GlobalISel should abort on error. - /// When this is disable, GlobalISel will fall back on SDISel instead of + /// When this is disabled, GlobalISel will fall back on SDISel instead of /// erroring out. - virtual bool isGlobalISelAbortEnabled() const; + bool isGlobalISelAbortEnabled() const; /// Check whether or not a diagnostic should be emitted when GlobalISel /// uses the fallback path. In other words, it will emit a diagnostic @@ -416,6 +395,13 @@ protected: /// immediately before machine code is emitted. virtual void addPreEmitPass() { } + /// Targets may add passes immediately before machine code is emitted in this + /// callback. This is called even later than `addPreEmitPass`. + // FIXME: Rename `addPreEmitPass` to something more sensible given its actual + // position and remove the `2` suffix here as this callback is what + // `addPreEmitPass` *should* be but in reality isn't. + virtual void addPreEmitPass2() {} + /// Utilities for targets to add passes to the pass manager. /// diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h index 81907538fb0b..538a5845466c 100644 --- a/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/include/llvm/CodeGen/TargetRegisterInfo.h @@ -21,11 +21,11 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include <cassert> @@ -238,12 +238,12 @@ private: protected: TargetRegisterInfo(const TargetRegisterInfoDesc *ID, - regclass_iterator RegClassBegin, - regclass_iterator RegClassEnd, + regclass_iterator RCB, + regclass_iterator RCE, const char *const *SRINames, const LaneBitmask *SRILaneMasks, LaneBitmask CoveringLanes, - const RegClassInfo *const RSI, + const RegClassInfo *const RCIs, unsigned Mode = 0); virtual ~TargetRegisterInfo(); @@ -444,6 +444,13 @@ public: return false; } + /// Returns the original SrcReg unless it is the target of a copy-like + /// operation, in which case we chain backwards through all such operations + /// to the ultimate source register. If a physical register is encountered, + /// we stop the search. + virtual unsigned lookThruCopyLike(unsigned SrcReg, + const MachineRegisterInfo *MRI) const; + /// Return a null-terminated list of all of the callee-saved registers on /// this target. The register should be in the order of desired callee-save /// stack frame offset. The first register is closest to the incoming stack @@ -752,6 +759,9 @@ public: virtual const RegClassWeight &getRegClassWeight( const TargetRegisterClass *RC) const = 0; + /// Returns size in bits of a phys/virtual/generic register. + unsigned getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const; + /// Get the weight in units of pressure for this register unit. virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0; @@ -961,7 +971,7 @@ public: //===--------------------------------------------------------------------===// /// Subtarget Hooks - /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true. + /// SrcRC and DstRC will be morphed into NewRC if this returns true. virtual bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, @@ -985,6 +995,12 @@ public: /// of the set as well. bool checkAllSuperRegsMarked(const BitVector &RegisterSet, ArrayRef<MCPhysReg> Exceptions = ArrayRef<MCPhysReg>()) const; + + virtual const TargetRegisterClass * + getConstrainedRegClassForOperand(const MachineOperand &MO, + const MachineRegisterInfo &MRI) const { + return nullptr; + } }; //===----------------------------------------------------------------------===// @@ -1151,7 +1167,8 @@ struct VirtReg2IndexFunctor { /// /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n'; Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, - unsigned SubRegIdx = 0); + unsigned SubIdx = 0, + const MachineRegisterInfo *MRI = nullptr); /// Create Printable object to print register units on a \ref raw_ostream. /// @@ -1163,11 +1180,11 @@ Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, /// Usage: OS << printRegUnit(Unit, TRI) << '\n'; Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); -/// \brief Create Printable object to print virtual registers and physical +/// Create Printable object to print virtual registers and physical /// registers on a \ref raw_ostream. Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); -/// \brief Create Printable object to print register classes or register banks +/// Create Printable object to print register classes or register banks /// on a \ref raw_ostream. Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI); diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 1044f0bd27e6..6173925e23a1 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -45,24 +46,23 @@ class TargetSchedModel { public: TargetSchedModel() : SchedModel(MCSchedModel::GetDefaultSchedModel()) {} - /// \brief Initialize the machine model for instruction scheduling. + /// Initialize the machine model for instruction scheduling. /// /// The machine model API keeps a copy of the top-level MCSchedModel table /// indices and may query TargetSubtargetInfo and TargetInstrInfo to resolve /// dynamic properties. - void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, - const TargetInstrInfo *tii); + void init(const TargetSubtargetInfo *TSInfo); /// Return the MCSchedClassDesc for this instruction. const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const; - /// \brief TargetSubtargetInfo getter. + /// TargetSubtargetInfo getter. const TargetSubtargetInfo *getSubtargetInfo() const { return STI; } - /// \brief TargetInstrInfo getter. + /// TargetInstrInfo getter. const TargetInstrInfo *getInstrInfo() const { return TII; } - /// \brief Return true if this machine model includes an instruction-level + /// Return true if this machine model includes an instruction-level /// scheduling model. /// /// This is more detailed than the course grain IssueWidth and default @@ -71,7 +71,7 @@ public: const MCSchedModel *getMCSchedModel() const { return &SchedModel; } - /// \brief Return true if this machine model includes cycle-to-cycle itinerary + /// Return true if this machine model includes cycle-to-cycle itinerary /// data. /// /// This models scheduling at each stage in the processor pipeline. @@ -83,35 +83,35 @@ public: return nullptr; } - /// \brief Return true if this machine model includes an instruction-level + /// Return true if this machine model includes an instruction-level /// scheduling model or cycle-to-cycle itinerary data. bool hasInstrSchedModelOrItineraries() const { return hasInstrSchedModel() || hasInstrItineraries(); } - /// \brief Identify the processor corresponding to the current subtarget. + /// Identify the processor corresponding to the current subtarget. unsigned getProcessorID() const { return SchedModel.getProcessorID(); } - /// \brief Maximum number of micro-ops that may be scheduled per cycle. + /// Maximum number of micro-ops that may be scheduled per cycle. unsigned getIssueWidth() const { return SchedModel.IssueWidth; } - /// \brief Return true if new group must begin. + /// Return true if new group must begin. bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC = nullptr) const; - /// \brief Return true if current group must end. + /// Return true if current group must end. bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC = nullptr) const; - /// \brief Return the number of issue slots required for this MI. + /// Return the number of issue slots required for this MI. unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC = nullptr) const; - /// \brief Get the number of kinds of resources for this target. + /// Get the number of kinds of resources for this target. unsigned getNumProcResourceKinds() const { return SchedModel.getNumProcResourceKinds(); } - /// \brief Get a processor resource by ID for convenience. + /// Get a processor resource by ID for convenience. const MCProcResourceDesc *getProcResource(unsigned PIdx) const { return SchedModel.getProcResource(PIdx); } @@ -126,7 +126,7 @@ public: using ProcResIter = const MCWriteProcResEntry *; - // \brief Get an iterator into the processor resources consumed by this + // Get an iterator into the processor resources consumed by this // scheduling class. ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const { // The subtarget holds a single resource table for all processors. @@ -136,34 +136,34 @@ public: return STI->getWriteProcResEnd(SC); } - /// \brief Multiply the number of units consumed for a resource by this factor + /// Multiply the number of units consumed for a resource by this factor /// to normalize it relative to other resources. unsigned getResourceFactor(unsigned ResIdx) const { return ResourceFactors[ResIdx]; } - /// \brief Multiply number of micro-ops by this factor to normalize it + /// Multiply number of micro-ops by this factor to normalize it /// relative to other resources. unsigned getMicroOpFactor() const { return MicroOpFactor; } - /// \brief Multiply cycle count by this factor to normalize it relative to + /// Multiply cycle count by this factor to normalize it relative to /// other resources. This is the number of resource units per cycle. unsigned getLatencyFactor() const { return ResourceLCM; } - /// \brief Number of micro-ops that may be buffered for OOO execution. + /// Number of micro-ops that may be buffered for OOO execution. unsigned getMicroOpBufferSize() const { return SchedModel.MicroOpBufferSize; } - /// \brief Number of resource units that may be buffered for OOO execution. + /// Number of resource units that may be buffered for OOO execution. /// \return The buffer size in resource units or -1 for unlimited. int getResourceBufferSize(unsigned PIdx) const { return SchedModel.getProcResource(PIdx)->BufferSize; } - /// \brief Compute operand latency based on the available machine model. + /// Compute operand latency based on the available machine model. /// /// Compute and return the latency of the given data dependent def and use /// when the operand indices are already known. UseMI may be NULL for an @@ -172,7 +172,7 @@ public: const MachineInstr *UseMI, unsigned UseOperIdx) const; - /// \brief Compute the instruction latency based on the available machine + /// Compute the instruction latency based on the available machine /// model. /// /// Compute and return the expected latency of this instruction independent of @@ -185,18 +185,20 @@ public: /// if converter after moving it to TargetSchedModel). unsigned computeInstrLatency(const MachineInstr *MI, bool UseDefaultDefLatency = true) const; + unsigned computeInstrLatency(const MCInst &Inst) const; unsigned computeInstrLatency(unsigned Opcode) const; - /// \brief Output dependency latency of a pair of defs of the same register. + /// Output dependency latency of a pair of defs of the same register. /// /// This is typically one cycle. - unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, + unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const; - /// \brief Compute the reciprocal throughput of the given instruction. - Optional<double> computeInstrRThroughput(const MachineInstr *MI) const; - Optional<double> computeInstrRThroughput(unsigned Opcode) const; + /// Compute the reciprocal throughput of the given instruction. + double computeReciprocalThroughput(const MachineInstr *MI) const; + double computeReciprocalThroughput(const MCInst &MI) const; + double computeReciprocalThroughput(unsigned Opcode) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h index 576522aef466..227e591f5a7d 100644 --- a/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -144,7 +144,7 @@ public: return 0; } - /// \brief True if the subtarget should run MachineScheduler after aggressive + /// True if the subtarget should run MachineScheduler after aggressive /// coalescing. /// /// This currently replaces the SelectionDAG scheduler with the "source" order @@ -152,14 +152,14 @@ public: /// TargetLowering preference). It does not yet disable the postRA scheduler. virtual bool enableMachineScheduler() const; - /// \brief Support printing of [latency:throughput] comment in output .S file. + /// Support printing of [latency:throughput] comment in output .S file. virtual bool supportPrintSchedInfo() const { return false; } - /// \brief True if the machine scheduler should disable the TLI preference + /// True if the machine scheduler should disable the TLI preference /// for preRA scheduling with the source level scheduler. virtual bool enableMachineSchedDefaultSched() const { return true; } - /// \brief True if the subtarget should enable joining global copies. + /// True if the subtarget should enable joining global copies. /// /// By default this is enabled if the machine scheduler is enabled, but /// can be overridden. @@ -171,10 +171,13 @@ public: /// which is the preferred way to influence this. virtual bool enablePostRAScheduler() const; - /// \brief True if the subtarget should run the atomic expansion pass. + /// True if the subtarget should run the atomic expansion pass. virtual bool enableAtomicExpand() const; - /// \brief Override generic scheduling policy within a region. + /// True if the subtarget should run the indirectbr expansion pass. + virtual bool enableIndirectBrExpand() const; + + /// Override generic scheduling policy within a region. /// /// This is a convenient way for targets that don't provide any custom /// scheduling heuristics (no custom MachineSchedStrategy) to make @@ -182,7 +185,7 @@ public: virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const {} - // \brief Perform target specific adjustments to the latency of a schedule + // Perform target specific adjustments to the latency of a schedule // dependency. virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {} @@ -197,13 +200,13 @@ public: return CriticalPathRCs.clear(); } - // \brief Provide an ordered list of schedule DAG mutations for the post-RA + // Provide an ordered list of schedule DAG mutations for the post-RA // scheduler. virtual void getPostRAMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { } - // \brief Provide an ordered list of schedule DAG mutations for the machine + // Provide an ordered list of schedule DAG mutations for the machine // pipeliner. virtual void getSMSMutations( std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { @@ -215,25 +218,25 @@ public: return CodeGenOpt::Default; } - /// \brief True if the subtarget should run the local reassignment + /// True if the subtarget should run the local reassignment /// heuristic of the register allocator. /// This heuristic may be compile time intensive, \p OptLevel provides /// a finer grain to tune the register allocator. virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const; - /// \brief True if the subtarget should consider the cost of local intervals + /// True if the subtarget should consider the cost of local intervals /// created by a split candidate when choosing the best split candidate. This /// heuristic may be compile time intensive. virtual bool enableAdvancedRASplitCost() const; - /// \brief Enable use of alias analysis during code generation (during MI + /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). virtual bool useAA() const; - /// \brief Enable the use of the early if conversion pass. + /// Enable the use of the early if conversion pass. virtual bool enableEarlyIfConversion() const { return false; } - /// \brief Return PBQPConstraint(s) for the target. + /// Return PBQPConstraint(s) for the target. /// /// Override to provide custom PBQP constraints. virtual std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const { @@ -246,8 +249,11 @@ public: virtual bool enableSubRegLiveness() const { return false; } /// Returns string representation of scheduler comment - std::string getSchedInfoStr(const MachineInstr &MI) const override; + std::string getSchedInfoStr(const MachineInstr &MI) const; std::string getSchedInfoStr(MCInst const &MCI) const override; + + /// This is called after a .mir file was loaded. + virtual void mirFileLoaded(MachineFunction &MF) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 40d501edde10..d2ef4a94f8e2 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -16,8 +16,8 @@ #ifndef LLVM_CODEGEN_VALUETYPES_H #define LLVM_CODEGEN_VALUETYPES_H -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 73c7fb4ce4b3..0abb4ece1d14 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// // // Value types - These values correspond to the register types defined in the -// ValueTypes.h file. If you update anything here, you must update it there as -// well! +// MachineValueTypes.h file. If you update anything here, you must update it +// there as well! // //===----------------------------------------------------------------------===// @@ -69,7 +69,7 @@ def v4i32 : ValueType<128, 43>; // 4 x i32 vector value def v8i32 : ValueType<256, 44>; // 8 x i32 vector value def v16i32 : ValueType<512, 45>; // 16 x i32 vector value def v32i32 : ValueType<1024,46>; // 32 x i32 vector value -def v64i32 : ValueType<2048,47>; // 32 x i32 vector value +def v64i32 : ValueType<2048,47>; // 64 x i32 vector value def v1i64 : ValueType<64 , 48>; // 1 x i64 vector value def v2i64 : ValueType<128, 49>; // 2 x i64 vector value @@ -145,6 +145,7 @@ def x86mmx : ValueType<64 , 109>; // X86 MMX value def FlagVT : ValueType<0 , 110>; // Pre-RA sched glue def isVoid : ValueType<0 , 111>; // Produces no value def untyped: ValueType<8 , 112>; // Produces an untyped value +def ExceptRef: ValueType<0, 113>; // WebAssembly's except_ref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index 3b06f0393114..6a8e50a7e5f5 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -90,24 +90,24 @@ class TargetInstrInfo; void grow(); - /// @brief returns true if the specified virtual register is + /// returns true if the specified virtual register is /// mapped to a physical register bool hasPhys(unsigned virtReg) const { return getPhys(virtReg) != NO_PHYS_REG; } - /// @brief returns the physical register mapped to the specified + /// returns the physical register mapped to the specified /// virtual register unsigned getPhys(unsigned virtReg) const { assert(TargetRegisterInfo::isVirtualRegister(virtReg)); return Virt2PhysMap[virtReg]; } - /// @brief creates a mapping for the specified virtual register to + /// creates a mapping for the specified virtual register to /// the specified physical register void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); - /// @brief clears the specified virtual register's, physical + /// clears the specified virtual register's, physical /// register mapping void clearVirt(unsigned virtReg) { assert(TargetRegisterInfo::isVirtualRegister(virtReg)); @@ -116,26 +116,26 @@ class TargetInstrInfo; Virt2PhysMap[virtReg] = NO_PHYS_REG; } - /// @brief clears all virtual to physical register mappings + /// clears all virtual to physical register mappings void clearAllVirt() { Virt2PhysMap.clear(); grow(); } - /// @brief returns true if VirtReg is assigned to its preferred physreg. + /// returns true if VirtReg is assigned to its preferred physreg. bool hasPreferredPhys(unsigned VirtReg); - /// @brief returns true if VirtReg has a known preferred register. + /// returns true if VirtReg has a known preferred register. /// This returns false if VirtReg has a preference that is a virtual /// register that hasn't been assigned yet. bool hasKnownPreference(unsigned VirtReg); - /// @brief records virtReg is a split live interval from SReg. + /// records virtReg is a split live interval from SReg. void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { Virt2SplitMap[virtReg] = SReg; } - /// @brief returns the live interval virtReg is split from. + /// returns the live interval virtReg is split from. unsigned getPreSplitReg(unsigned virtReg) const { return Virt2SplitMap[virtReg]; } @@ -149,7 +149,7 @@ class TargetInstrInfo; return Orig ? Orig : VirtReg; } - /// @brief returns true if the specified virtual register is not + /// returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. bool isAssignedReg(unsigned virtReg) const { if (getStackSlot(virtReg) == NO_STACK_SLOT) @@ -159,20 +159,20 @@ class TargetInstrInfo; return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); } - /// @brief returns the stack slot mapped to the specified virtual + /// returns the stack slot mapped to the specified virtual /// register int getStackSlot(unsigned virtReg) const { assert(TargetRegisterInfo::isVirtualRegister(virtReg)); return Virt2StackSlotMap[virtReg]; } - /// @brief create a mapping for the specifed virtual register to + /// create a mapping for the specifed virtual register to /// the next available stack slot int assignVirt2StackSlot(unsigned virtReg); - /// @brief create a mapping for the specified virtual register to + /// create a mapping for the specified virtual register to /// the specified stack slot - void assignVirt2StackSlot(unsigned virtReg, int frameIndex); + void assignVirt2StackSlot(unsigned virtReg, int SS); void print(raw_ostream &OS, const Module* M = nullptr) const override; void dump() const; diff --git a/include/llvm/CodeGen/WasmEHFuncInfo.h b/include/llvm/CodeGen/WasmEHFuncInfo.h new file mode 100644 index 000000000000..3ad6760d8813 --- /dev/null +++ b/include/llvm/CodeGen/WasmEHFuncInfo.h @@ -0,0 +1,80 @@ +//===--- llvm/CodeGen/WasmEHFuncInfo.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for Wasm exception handling schemes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_WASMEHFUNCINFO_H +#define LLVM_CODEGEN_WASMEHFUNCINFO_H + +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/BasicBlock.h" + +namespace llvm { + +using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>; + +struct WasmEHFuncInfo { + // When there is an entry <A, B>, if an exception is not caught by A, it + // should next unwind to the EH pad B. + DenseMap<BBOrMBB, BBOrMBB> EHPadUnwindMap; + // For entry <A, B>, A is a BB with an instruction that may throw + // (invoke/cleanupret in LLVM IR, call/rethrow in the backend) and B is an EH + // pad that A unwinds to. + DenseMap<BBOrMBB, BBOrMBB> ThrowUnwindMap; + + // Helper functions + const BasicBlock *getEHPadUnwindDest(const BasicBlock *BB) const { + return EHPadUnwindMap.lookup(BB).get<const BasicBlock *>(); + } + void setEHPadUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) { + EHPadUnwindMap[BB] = Dest; + } + const BasicBlock *getThrowUnwindDest(BasicBlock *BB) const { + return ThrowUnwindMap.lookup(BB).get<const BasicBlock *>(); + } + void setThrowUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) { + ThrowUnwindMap[BB] = Dest; + } + bool hasEHPadUnwindDest(const BasicBlock *BB) const { + return EHPadUnwindMap.count(BB); + } + bool hasThrowUnwindDest(const BasicBlock *BB) const { + return ThrowUnwindMap.count(BB); + } + + MachineBasicBlock *getEHPadUnwindDest(MachineBasicBlock *MBB) const { + return EHPadUnwindMap.lookup(MBB).get<MachineBasicBlock *>(); + } + void setEHPadUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) { + EHPadUnwindMap[MBB] = Dest; + } + MachineBasicBlock *getThrowUnwindDest(MachineBasicBlock *MBB) const { + return ThrowUnwindMap.lookup(MBB).get<MachineBasicBlock *>(); + } + void setThrowUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) { + ThrowUnwindMap[MBB] = Dest; + } + bool hasEHPadUnwindDest(MachineBasicBlock *MBB) const { + return EHPadUnwindMap.count(MBB); + } + bool hasThrowUnwindDest(MachineBasicBlock *MBB) const { + return ThrowUnwindMap.count(MBB); + } +}; + +// Analyze the IR in the given function to build WasmEHFuncInfo. +void calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo); + +} // namespace llvm + +#endif // LLVM_CODEGEN_WASMEHFUNCINFO_H |