diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /include/llvm/CodeGen | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) |
Notes
Diffstat (limited to 'include/llvm/CodeGen')
68 files changed, 2906 insertions, 984 deletions
diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h index 734531a65d50..f8f6b5448f3f 100644 --- a/include/llvm/CodeGen/AccelTable.h +++ b/include/llvm/CodeGen/AccelTable.h @@ -101,8 +101,6 @@ /// /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable /// function. -/// -/// TODO: Add DWARF v5 emission code. namespace llvm { diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index d110f8b01cb5..a4580da5aec9 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -111,6 +111,10 @@ public: /// of each call to runOnMachineFunction(). MCSymbol *CurrentFnSym = nullptr; + /// The symbol for the current function descriptor on AIX. This is created + /// at the beginning of each call to SetupMachineFunction(). + MCSymbol *CurrentFnDescSym = nullptr; + /// The symbol used to represent the start of the current function for the /// purpose of calculating its size (e.g. using the .size directive). By /// default, this is equal to CurrentFnSym. @@ -304,7 +308,7 @@ public: /// This should be called when a new MachineFunction is being processed from /// runOnMachineFunction. - void SetupMachineFunction(MachineFunction &MF); + virtual void SetupMachineFunction(MachineFunction &MF); /// This method emits the body and trailer for a function. void EmitFunctionBody(); @@ -342,12 +346,11 @@ public: /// so, emit it and return true, otherwise do nothing and return false. bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - /// Emit an alignment directive to the specified power of two boundary. For - /// example, if you pass in 3 here, you will get an 8 byte alignment. If a + /// Emit an alignment directive to the specified power of two boundary. If a /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for /// correctness. - void EmitAlignment(unsigned NumBits, const GlobalObject *GV = nullptr) const; + void EmitAlignment(Align Alignment, const GlobalObject *GV = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. virtual const MCExpr *lowerConstant(const Constant *CV); @@ -400,7 +403,7 @@ public: /// By default, this method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing it /// if appropriate. - virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; + virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB); /// Targets can override this to emit stuff at the end of a basic block. virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB); @@ -415,6 +418,10 @@ public: virtual void EmitFunctionEntryLabel(); + virtual void EmitFunctionDescriptor() { + llvm_unreachable("Function descriptor is target-specific."); + } + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); /// Targets can override this to change how global constants that are part of @@ -635,6 +642,10 @@ public: /// supported by the target. void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const; + /// Return the alignment for the specified \p GV. + static Align getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign = Align::None()); + private: /// Private state for PrintSpecial() // Assign a unique ID to this machine instruction. diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 70bf670fdf0b..2e57b4c9d332 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -190,6 +190,7 @@ private: protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} + virtual ~BasicTTIImplBase() = default; using TargetTransformInfoImplBase::DL; @@ -215,6 +216,16 @@ public: return -1; } + bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, + Intrinsic::ID IID) const { + return false; + } + + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const { + return false; + } + bool isLegalAddImmediate(int64_t imm) { return getTLI()->isLegalAddImmediate(imm); } @@ -317,7 +328,7 @@ public: unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize) { /// Try to find the estimated number of clusters. Note that the number of - /// clusters identified in this function could be different from the actural + /// clusters identified in this function could be different from the actual /// numbers found in lowering. This function ignore switches that are /// lowered with a mix of jump table / bit test / BTree. This function was /// initially intended to be used when estimating the cost of switch in @@ -371,10 +382,6 @@ public: return N; } - unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } - - unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } - bool shouldBuildLookupTables() { const TargetLoweringBase *TLI = getTLI(); return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || @@ -508,13 +515,44 @@ public: return BaseT::getInstructionLatency(I); } + virtual Optional<unsigned> + getCacheSize(TargetTransformInfo::CacheLevel Level) const { + return Optional<unsigned>( + getST()->getCacheSize(static_cast<unsigned>(Level))); + } + + virtual Optional<unsigned> + getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { + Optional<unsigned> TargetResult = + getST()->getCacheAssociativity(static_cast<unsigned>(Level)); + + if (TargetResult) + return TargetResult; + + return BaseT::getCacheAssociativity(Level); + } + + virtual unsigned getCacheLineSize() const { + return getST()->getCacheLineSize(); + } + + virtual unsigned getPrefetchDistance() const { + return getST()->getPrefetchDistance(); + } + + virtual unsigned getMinPrefetchStride() const { + return getST()->getMinPrefetchStride(); + } + + virtual unsigned getMaxPrefetchIterationsAhead() const { + return getST()->getMaxPrefetchIterationsAhead(); + } + /// @} /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } - unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract @@ -1111,9 +1149,7 @@ public: OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, nullptr); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, @@ -1131,7 +1167,6 @@ public: unsigned getIntrinsicInstrCost( Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { - unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); auto *ConcreteTTI = static_cast<T *>(this); SmallVector<unsigned, 2> ISDs; @@ -1288,9 +1323,7 @@ public: /*IsUnsigned=*/false); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat @@ -1310,9 +1343,7 @@ public: } case Intrinsic::uadd_sat: case Intrinsic::usub_sat: { - Type *CondTy = Type::getInt1Ty(RetTy->getContext()); - if (RetVF > 1) - CondTy = VectorType::get(CondTy, RetVF); + Type *CondTy = RetTy->getWithNewBitWidth(1); Type *OpTy = StructType::create({RetTy, CondTy}); Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat @@ -1329,9 +1360,7 @@ public: case Intrinsic::smul_fix: case Intrinsic::umul_fix: { unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (RetVF > 1) - ExtTy = VectorType::get(ExtTy, RetVF); + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; @@ -1395,9 +1424,7 @@ public: Type *MulTy = RetTy->getContainedType(0); Type *OverflowTy = RetTy->getContainedType(1); unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize); - if (MulTy->isVectorTy()) - ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() ); + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index aa339e1cc913..a30ca638ee6d 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Alignment.h" namespace llvm { @@ -43,6 +44,7 @@ public: AExtUpper, // The value is in the upper bits of the location and should be // extended with undefined upper bits when retrieved. BCvt, // The value is bit-converted in the location. + Trunc, // The value is truncated in the location. VExt, // The value is vector-widened in the location. // FIXME: Not implemented yet. Code that uses AExt to mean // vector-widen should be fixed to use VExt instead. @@ -197,7 +199,7 @@ private: LLVMContext &Context; unsigned StackOffset; - unsigned MaxStackArgAlign; + Align MaxStackArgAlign; SmallVector<uint32_t, 16> UsedRegs; SmallVector<CCValAssign, 4> PendingLocs; SmallVector<ISD::ArgFlagsTy, 4> PendingArgFlags; @@ -421,19 +423,19 @@ public: /// AllocateStack - Allocate a chunk of stack space with the specified size /// and alignment. - unsigned AllocateStack(unsigned Size, unsigned Align) { - assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2. - StackOffset = alignTo(StackOffset, Align); + unsigned AllocateStack(unsigned Size, unsigned Alignment) { + const Align CheckedAlignment(Alignment); + StackOffset = alignTo(StackOffset, CheckedAlignment); unsigned Result = StackOffset; StackOffset += Size; - MaxStackArgAlign = std::max(Align, MaxStackArgAlign); - ensureMaxAlignment(Align); + MaxStackArgAlign = std::max(CheckedAlignment, MaxStackArgAlign); + ensureMaxAlignment(CheckedAlignment); return Result; } - void ensureMaxAlignment(unsigned Align) { + void ensureMaxAlignment(Align Alignment) { if (!AnalyzingMustTailForwardedRegs) - MF.getFrameInfo().ensureMaxAlignment(Align); + MF.getFrameInfo().ensureMaxAlignment(Alignment.value()); } /// Version of AllocateStack with extra register to be shadowed. diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index cf58ee0cabea..705465b15c4c 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -28,6 +28,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/Support/Automaton.h" #include <cstdint> #include <map> #include <memory> @@ -76,26 +77,26 @@ using DFAStateInput = int64_t; class DFAPacketizer { private: - using UnsignPair = std::pair<unsigned, DFAInput>; - const InstrItineraryData *InstrItins; - int CurrentState = 0; - const DFAStateInput (*DFAStateInputTable)[2]; - const unsigned *DFAStateEntryTable; - - // CachedTable is a map from <FromState, Input> to ToState. - DenseMap<UnsignPair, unsigned> CachedTable; - - // Read the DFA transition table and update CachedTable. - void ReadTable(unsigned state); + Automaton<DFAInput> A; public: - DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], - const unsigned *SET); + DFAPacketizer(const InstrItineraryData *InstrItins, Automaton<uint64_t> a) : + InstrItins(InstrItins), A(std::move(a)) { + // Start off with resource tracking disabled. + A.enableTranscription(false); + } // Reset the current state to make all resources available. void clearResources() { - CurrentState = 0; + A.reset(); + } + + // Set whether this packetizer should track not just whether instructions + // can be packetized, but also which functional units each instruction ends up + // using after packetization. + void setTrackResources(bool Track) { + A.enableTranscription(Track); } // Return the DFAInput for an instruction class. @@ -120,6 +121,15 @@ public: // current state to reflect that change. void reserveResources(MachineInstr &MI); + // Return the resources used by the InstIdx'th instruction added to this + // packet. The resources are returned as a bitvector of functional units. + // + // Note that a bundle may be packed in multiple valid ways. This function + // returns one arbitary valid packing. + // + // Requires setTrackResources(true) to have been called. + unsigned getUsedResources(unsigned InstIdx); + const InstrItineraryData *getInstrItins() const { return InstrItins; } }; @@ -134,7 +144,7 @@ class VLIWPacketizerList { protected: MachineFunction &MF; const TargetInstrInfo *TII; - AliasAnalysis *AA; + AAResults *AA; // The VLIW Scheduler. DefaultVLIWScheduler *VLIWScheduler; @@ -146,9 +156,9 @@ protected: std::map<MachineInstr*, SUnit*> MIToSUnit; public: - // The AliasAnalysis parameter can be nullptr. + // The AAResults parameter can be nullptr. VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - AliasAnalysis *AA); + AAResults *AA); virtual ~VLIWPacketizerList(); diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index 684f9e40ca5a..e8e7504a6cda 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -550,6 +550,14 @@ public: return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr); } + void takeNodes(IntrusiveBackList<T> &Other) { + for (auto &N : Other) { + N.Next.setPointerAndInt(&N, true); + push_back(N); + } + Other.Last = nullptr; + } + class const_iterator; class iterator : public iterator_facade_base<iterator, std::forward_iterator_tag, T> { @@ -685,6 +693,10 @@ public: return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value))); } + /// Take ownership of the nodes in \p Other, and append them to the back of + /// the list. + void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); } + value_range values() { return make_range(value_iterator(List.begin()), value_iterator(List.end())); } diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index f09b59daf4dd..03d681feb7aa 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -93,9 +93,9 @@ public: SmallVector<Value *, 16> OutVals; SmallVector<ISD::ArgFlagsTy, 16> OutFlags; - SmallVector<unsigned, 16> OutRegs; + SmallVector<Register, 16> OutRegs; SmallVector<ISD::InputArg, 4> Ins; - SmallVector<unsigned, 4> InRegs; + SmallVector<Register, 4> InRegs; CallLoweringInfo() : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index fb60191abd3a..f812a2f6c585 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -20,7 +20,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -37,6 +36,7 @@ namespace llvm { class Argument; class BasicBlock; class BranchProbabilityInfo; +class LegacyDivergenceAnalysis; class Function; class Instruction; class MachineFunction; diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index d717121ad78e..4901a3748e4a 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -45,18 +45,62 @@ class CallLowering { public: struct ArgInfo { SmallVector<Register, 4> Regs; + // If the argument had to be split into multiple parts according to the + // target calling convention, then this contains the original vregs + // if the argument was an incoming arg. + SmallVector<Register, 2> OrigRegs; Type *Ty; - ISD::ArgFlagsTy Flags; + SmallVector<ISD::ArgFlagsTy, 4> Flags; bool IsFixed; ArgInfo(ArrayRef<Register> Regs, Type *Ty, - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true) - : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags), - IsFixed(IsFixed) { + ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(), + bool IsFixed = true) + : Regs(Regs.begin(), Regs.end()), Ty(Ty), + Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) { + if (!Regs.empty() && Flags.empty()) + this->Flags.push_back(ISD::ArgFlagsTy()); // FIXME: We should have just one way of saying "no register". assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) && "only void types should have no register"); } + + ArgInfo() : Ty(nullptr), IsFixed(false) {} + }; + + struct CallLoweringInfo { + /// Calling convention to be used for the call. + CallingConv::ID CallConv = CallingConv::C; + + /// Destination of the call. It should be either a register, globaladdress, + /// or externalsymbol. + MachineOperand Callee = MachineOperand::CreateImm(0); + + /// Descriptor for the return type of the function. + ArgInfo OrigRet; + + /// List of descriptors of the arguments passed to the function. + SmallVector<ArgInfo, 8> OrigArgs; + + /// Valid if the call has a swifterror inout parameter, and contains the + /// vreg that the swifterror should be copied into after the call. + Register SwiftErrorVReg = 0; + + MDNode *KnownCallees = nullptr; + + /// True if the call must be tail call optimized. + bool IsMustTailCall = false; + + /// True if the call passes all target-independent checks for tail call + /// optimization. + bool IsTailCall = false; + + /// True if the call was lowered as a tail call. This is consumed by the + /// legalizer. This allows the legalizer to lower libcalls as tail calls. + bool LoweredTailCall = false; + + /// True if the call is to a vararg function. + bool IsVarArg = false; }; /// Argument handling is mostly uniform between the four places that @@ -72,9 +116,9 @@ public: virtual ~ValueHandler() = default; - /// Returns true if the handler is dealing with formal arguments, - /// not with return values etc. - virtual bool isArgumentHandler() const { return false; } + /// Returns true if the handler is dealing with incoming arguments, + /// i.e. those that move values from some physical location to vregs. + virtual bool isIncomingArgumentHandler() const = 0; /// Materialize a VReg containing the address of the specified /// stack-based object. This is either based on a FrameIndex or @@ -112,8 +156,8 @@ public: virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const ArgInfo &Info, - CCState &State) { - return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + ISD::ArgFlagsTy Flags, CCState &State) { + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); } MachineIRBuilder &MIRBuilder; @@ -162,12 +206,42 @@ protected: /// \p Callback to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args, + bool handleAssignments(MachineIRBuilder &MIRBuilder, + SmallVectorImpl<ArgInfo> &Args, ValueHandler &Handler) const; bool handleAssignments(CCState &CCState, SmallVectorImpl<CCValAssign> &ArgLocs, - MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args, + MachineIRBuilder &MIRBuilder, + SmallVectorImpl<ArgInfo> &Args, ValueHandler &Handler) const; + + /// Analyze passed or returned values from a call, supplied in \p ArgInfo, + /// incorporating info about the passed values into \p CCState. + /// + /// Used to check if arguments are suitable for tail call lowering. + bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const; + + /// \returns True if the calling convention for a callee and its caller pass + /// results in the same way. Typically used for tail call eligibility checks. + /// + /// \p Info is the CallLoweringInfo for the call. + /// \p MF is the MachineFunction for the caller. + /// \p InArgs contains the results of the call. + /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for + /// fixed arguments. + /// \p CalleeAssignFnVarArg is similar, but for varargs. + /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for + /// fixed arguments. + /// \p CallerAssignFnVarArg is similar, but for varargs. + bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl<ArgInfo> &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const; + public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} virtual ~CallLowering() = default; @@ -223,37 +297,10 @@ public: /// This hook must be implemented to lower the given call instruction, /// including argument and return value marshalling. /// - /// \p CallConv is the calling convention to be used for the call. - /// - /// \p Callee is the destination of the call. It should be either a register, - /// globaladdress, or externalsymbol. - /// - /// \p OrigRet is a descriptor for the return type of the function. - /// - /// \p OrigArgs is a list of descriptors of the arguments passed to the - /// function. - /// - /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout - /// parameter, and contains the vreg that the swifterror should be copied into - /// after the call. /// /// \return true if the lowering succeeded, false otherwise. - virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef<ArgInfo> OrigArgs, - Register SwiftErrorVReg) const { - if (!supportSwiftError()) { - assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror"); - return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs); - } - return false; - } - - /// This hook behaves as the extended lowerCall function, but for targets that - /// do not support swifterror value promotion. - virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, - const MachineOperand &Callee, const ArgInfo &OrigRet, - ArrayRef<ArgInfo> OrigArgs) const { + virtual bool lowerCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { return false; } diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 0c50c9c5e0cf..4c04dc52547d 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -27,6 +27,8 @@ class MachineIRBuilder; class MachineRegisterInfo; class MachineInstr; class MachineOperand; +class GISelKnownBits; +class MachineDominatorTree; struct PreferredTuple { LLT Ty; // The result type of the extend. @@ -35,12 +37,17 @@ struct PreferredTuple { }; class CombinerHelper { +protected: MachineIRBuilder &Builder; MachineRegisterInfo &MRI; GISelChangeObserver &Observer; + GISelKnownBits *KB; + MachineDominatorTree *MDT; public: - CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B); + CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + GISelKnownBits *KB = nullptr, + MachineDominatorTree *MDT = nullptr); /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; @@ -56,18 +63,132 @@ public: bool matchCombineCopy(MachineInstr &MI); void applyCombineCopy(MachineInstr &MI); + /// Returns true if \p DefMI precedes \p UseMI or they are the same + /// instruction. Both must be in the same basic block. + bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI); + + /// Returns true if \p DefMI dominates \p UseMI. By definition an + /// instruction dominates itself. + /// + /// If we haven't been provided with a MachineDominatorTree during + /// construction, this function returns a conservative result that tracks just + /// a single basic block. + bool dominates(MachineInstr &DefMI, MachineInstr &UseMI); + /// If \p MI is extend that consumes the result of a load, try to combine it. /// Returns true if MI changed. bool tryCombineExtendingLoads(MachineInstr &MI); bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); - bool matchCombineBr(MachineInstr &MI); - bool tryCombineBr(MachineInstr &MI); + /// Combine \p MI into a pre-indexed or post-indexed load/store operation if + /// legal and the surrounding code makes it useful. + bool tryCombineIndexedLoadStore(MachineInstr &MI); + + bool matchElideBrByInvertingCond(MachineInstr &MI); + void applyElideBrByInvertingCond(MachineInstr &MI); + bool tryElideBrByInvertingCond(MachineInstr &MI); + + /// If \p MI is G_CONCAT_VECTORS, try to combine it. + /// Returns true if MI changed. + /// Right now, we support: + /// - concat_vector(undef, undef) => undef + /// - concat_vector(build_vector(A, B), build_vector(C, D)) => + /// build_vector(A, B, C, D) + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool tryCombineConcatVectors(MachineInstr &MI); + /// Check if the G_CONCAT_VECTORS \p MI is undef or if it + /// can be flattened into a build_vector. + /// In the first case \p IsUndef will be true. + /// In the second case \p Ops will contain the operands needed + /// to produce the flattened build_vector. + /// + /// \pre MI.getOpcode() == G_CONCAT_VECTORS. + bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl<Register> &Ops); + /// Replace \p MI with a flattened build_vector with \p Ops or an + /// implicit_def if IsUndef is true. + void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef, + const ArrayRef<Register> Ops); + + /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. + /// Returns true if MI changed. + /// + /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. + bool tryCombineShuffleVector(MachineInstr &MI); + /// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a + /// concat_vectors. + /// \p Ops will contain the operands needed to produce the flattened + /// concat_vectors. + /// + /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. + bool matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl<Register> &Ops); + /// Replace \p MI with a concat_vectors with \p Ops. + void applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef<Register> Ops); + + /// Optimize memcpy intrinsics et al, e.g. constant len calls. + /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. + /// + /// For example (pre-indexed): + /// + /// $addr = G_GEP $base, $offset + /// [...] + /// $val = G_LOAD $addr + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre) + /// [...] + /// $whatever = COPY $addr + /// + /// or (post-indexed): + /// + /// G_STORE $val, $base + /// [...] + /// $addr = G_GEP $base, $offset + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $addr = G_INDEXED_STORE $val, $base, $offset + /// [...] + /// $whatever = COPY $addr + bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); + +private: + // Memcpy family optimization helpers. + bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, + unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile); + bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src, + unsigned KnownLen, unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile); + bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned DstAlign, bool IsVolatile); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a post-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a pre-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); }; } // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 3b09a8e2b479..ad645a46bbe6 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -27,9 +27,11 @@ class MachineRegisterInfo; class CombinerInfo { public: CombinerInfo(bool AllowIllegalOps, bool ShouldLegalizeIllegal, - LegalizerInfo *LInfo) + LegalizerInfo *LInfo, bool OptEnabled, bool OptSize, + bool MinSize) : IllegalOpsAllowed(AllowIllegalOps), - LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo) { + LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo), + EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) { assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) && "Expecting legalizerInfo when illegalops not allowed"); } @@ -43,6 +45,15 @@ public: bool LegalizeIllegalOps; // TODO: Make use of this. const LegalizerInfo *LInfo; + /// Whether optimizations should be enabled. This is to distinguish between + /// uses of the combiner unconditionally and only when optimizations are + /// specifically enabled/ + bool EnableOpt; + /// Whether we're optimizing for size. + bool EnableOptSize; + /// Whether we're optimizing for minsize (-Oz). + bool EnableMinSize; + /// Attempt to combine instructions using MI as the root. /// /// Use Observer to report the creation, modification, and erasure of diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h index e817d9b4550e..df196bfbd437 100644 --- a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h @@ -54,6 +54,17 @@ public: return buildConstant(Dst, MaybeCst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } return MachineIRBuilder::buildInstr(Opc, DstOps, SrcOps); } diff --git a/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h new file mode 100644 index 000000000000..dfe5a7f3177d --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -0,0 +1,111 @@ +//===- llvm/CodeGen/GlobalISel/GISelKnownBits.h ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H +#define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H + +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/KnownBits.h" + +namespace llvm { + +class TargetLowering; +class DataLayout; + +class GISelKnownBits : public GISelChangeObserver { + MachineFunction &MF; + MachineRegisterInfo &MRI; + const TargetLowering &TL; + const DataLayout &DL; + +public: + GISelKnownBits(MachineFunction &MF); + virtual ~GISelKnownBits() = default; + void setMF(MachineFunction &MF); + virtual void computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + + // KnownBitsAPI + KnownBits getKnownBits(Register R); + // Calls getKnownBits for first operand def of MI. + KnownBits getKnownBits(MachineInstr &MI); + APInt getKnownZeroes(Register R); + APInt getKnownOnes(Register R); + + /// \return true if 'V & Mask' is known to be zero in DemandedElts. We use + /// this predicate to simplify operations downstream. + /// Mask is known to be zero for bits that V cannot have. + bool maskedValueIsZero(Register Val, const APInt &Mask) { + return Mask.isSubsetOf(getKnownBits(Val).Zero); + } + + /// \return true if the sign bit of Op is known to be zero. We use this + /// predicate to simplify operations downstream. + bool signBitIsZero(Register Op); + + // FIXME: Is this the right place for G_FRAME_INDEX? Should it be in + // TargetLowering? + void computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + static Align inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF); + static void computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment); + + // Try to infer alignment for MI. + static MaybeAlign inferPtrAlignment(const MachineInstr &MI); + + // Observer API. No-op for non-caching implementation. + void erasingInstr(MachineInstr &MI) override{}; + void createdInstr(MachineInstr &MI) override{}; + void changingInstr(MachineInstr &MI) override{}; + void changedInstr(MachineInstr &MI) override{}; + +protected: + unsigned getMaxDepth() const { return 6; } +}; + +/// To use KnownBitsInfo analysis in a pass, +/// KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis>().get(MF); +/// Add to observer if the Info is caching. +/// WrapperObserver.addObserver(Info); + +/// Eventually add other features such as caching/ser/deserializing +/// to MIR etc. Those implementations can derive from GISelKnownBits +/// and override computeKnownBitsImpl. +class GISelKnownBitsAnalysis : public MachineFunctionPass { + std::unique_ptr<GISelKnownBits> Info; + +public: + static char ID; + GISelKnownBitsAnalysis() : MachineFunctionPass(ID) { + initializeGISelKnownBitsAnalysisPass(*PassRegistry::getPassRegistry()); + } + GISelKnownBits &get(MachineFunction &MF) { + if (!Info) + Info = std::make_unique<GISelKnownBits>(MF); + return *Info.get(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + void releaseMemory() override { Info.reset(); } +}; +} // namespace llvm + +#endif // ifdef diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 8654ba83f08d..bdb92aa4689d 100644 --- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -213,8 +213,8 @@ private: bool translateStore(const User &U, MachineIRBuilder &MIRBuilder); /// Translate an LLVM string intrinsic (memcpy, memset, ...). - bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID); + bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, + Intrinsic::ID ID); void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder); @@ -243,6 +243,10 @@ private: bool valueIsSplit(const Value &V, SmallVectorImpl<uint64_t> *Offsets = nullptr); + /// Common code for translating normal calls or invokes. + bool translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder); + /// Translate call instruction. /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); @@ -514,6 +518,10 @@ private: // function has the optnone attribute. bool EnableOpts = false; + /// True when the block contains a tail call. This allows the IRTranslator to + /// stop translating such blocks early. + bool HasTailCall = false; + /// Switch analysis and optimization. class GISelSwitchLowering : public SwitchCG::SwitchLowering { public: diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index e9b93be76754..fd3dc743000b 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -31,6 +31,7 @@ namespace llvm { class APInt; class APFloat; +class GISelKnownBits; class MachineInstr; class MachineInstrBuilder; class MachineFunction; @@ -148,6 +149,13 @@ enum { /// - AddrSpaceN+1 ... GIM_CheckMemoryAddressSpace, + /// Check the minimum alignment of the memory access for the given machine + /// memory operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - MinAlign - Minimum acceptable alignment + GIM_CheckMemoryAlignment, + /// Check the size of the memory access for the given machine memory operand /// against the size of an operand. /// - InsnID - Instruction ID @@ -201,11 +209,22 @@ enum { /// - Expected Intrinsic ID GIM_CheckIntrinsicID, + /// Check the operand is a specific predicate + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected predicate + GIM_CheckCmpPredicate, + /// Check the specified operand is an MBB /// - InsnID - Instruction ID /// - OpIdx - Operand index GIM_CheckIsMBB, + /// Check the specified operand is an Imm + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_CheckIsImm, + /// Check if the specified operand is safe to fold into the current /// instruction. /// - InsnID - Instruction ID @@ -365,7 +384,20 @@ public: /// if returns true: /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) - virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0; + virtual bool select(MachineInstr &I) = 0; + + CodeGenCoverage *CoverageInfo = nullptr; + GISelKnownBits *KnownBits = nullptr; + MachineFunction *MF = nullptr; + + /// Setup per-MF selector state. + virtual void setupMF(MachineFunction &mf, + GISelKnownBits &KB, + CodeGenCoverage &covinfo) { + CoverageInfo = &covinfo; + KnownBits = &KB; + MF = &mf; + } protected: using ComplexRendererFns = diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index e8ee4af0cb0b..08f2f54bcf90 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -98,7 +98,7 @@ bool InstructionSelector::executeMatchTable( return false; break; } - if (TRI.isPhysicalRegister(MO.getReg())) { + if (Register::isPhysicalRegister(MO.getReg())) { DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": Is a physical register\n"); if (handleReject() == RejectAndGiveUp) @@ -409,6 +409,30 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckMemoryAlignment: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + unsigned MinAlign = MatchTable[CurrentIdx++]; + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + MachineMemOperand *MMO + = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment" + << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx + << ")->getAlignment() >= " << MinAlign << ")\n"); + if (MMO->getAlignment() < MinAlign && handleReject() == RejectAndGiveUp) + return false; + + break; + } case GIM_CheckMemorySizeEqualTo: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t MMOIdx = MatchTable[CurrentIdx++]; @@ -638,7 +662,21 @@ bool InstructionSelector::executeMatchTable( return false; break; } - + case GIM_CheckCmpPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t Value = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isPredicate() || MO.getPredicate() != Value) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckIsMBB: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -652,7 +690,19 @@ bool InstructionSelector::executeMatchTable( } break; } - + case GIM_CheckIsImm: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "))\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckIsSafeToFold: { int64_t InsnID = MatchTable[CurrentIdx++]; DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), @@ -792,11 +842,13 @@ bool InstructionSelector::executeMatchTable( case GIR_AddRegister: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; + uint64_t RegFlags = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - OutMIs[InsnID].addReg(RegNum); - DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" - << InsnID << "], " << RegNum << ")\n"); + OutMIs[InsnID].addReg(RegNum, RegFlags); + DEBUG_WITH_TYPE( + TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" + << InsnID << "], " << RegNum << ", " << RegFlags << ")\n"); break; } diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index a22778b8848c..7f960e727846 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -47,8 +47,7 @@ public: bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts) { - if (MI.getOpcode() != TargetOpcode::G_ANYEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); @@ -93,9 +92,7 @@ public: bool tryCombineZExt(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_ZEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); @@ -136,32 +133,24 @@ public: bool tryCombineSExt(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_SEXT) - return false; + assert(MI.getOpcode() == TargetOpcode::G_SEXT); Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); - // sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c + // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) Register TruncSrc; if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLT DstTy = MRI.getType(DstReg); - // Guess on the RHS shift amount type, which should be re-legalized if - // applicable. - if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) || - isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) || - isConstantUnsupported(DstTy)) + if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); - auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt); - auto MIBShl = Builder.buildInstr( - TargetOpcode::G_SHL, {DstTy}, - {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), MIBShAmt}); - Builder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {MIBShl, MIBShAmt}); + uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); + Builder.buildInstr( + TargetOpcode::G_SEXT_INREG, {DstReg}, + {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits}); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -172,9 +161,8 @@ public: bool tryFoldImplicitDef(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts) { unsigned Opcode = MI.getOpcode(); - if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT && - Opcode != TargetOpcode::G_SEXT) - return false; + assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || + Opcode == TargetOpcode::G_SEXT); if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), MRI)) { @@ -203,21 +191,38 @@ public: return false; } - static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) { + static unsigned canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, + LLT OpTy, LLT DestTy) { if (OpTy.isVector() && DestTy.isVector()) - return TargetOpcode::G_CONCAT_VECTORS; + return MergeOp == TargetOpcode::G_CONCAT_VECTORS; + + if (OpTy.isVector() && !DestTy.isVector()) { + if (MergeOp == TargetOpcode::G_BUILD_VECTOR) + return true; - if (OpTy.isVector() && !DestTy.isVector()) - return TargetOpcode::G_BUILD_VECTOR; + if (MergeOp == TargetOpcode::G_CONCAT_VECTORS) { + if (ConvertOp == 0) + return true; - return TargetOpcode::G_MERGE_VALUES; + const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); + + // Don't handle scalarization with a cast that isn't in the same + // direction as the vector cast. This could be handled, but it would + // require more intermediate unmerges. + if (ConvertOp == TargetOpcode::G_TRUNC) + return DestTy.getSizeInBits() <= OpEltSize; + return DestTy.getSizeInBits() >= OpEltSize; + } + + return false; + } + + return MergeOp == TargetOpcode::G_MERGE_VALUES; } bool tryCombineMerges(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts) { - - if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) - return false; + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); unsigned NumDefs = MI.getNumOperands() - 1; MachineInstr *SrcDef = @@ -237,16 +242,14 @@ public: MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); } - // FIXME: Handle scalarizing concat_vectors (scalar result type with vector - // source) - unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy); - if (!MergeI || MergeI->getOpcode() != MergingOpcode) + if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), + ConvertOp, OpTy, DestTy)) return false; const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; if (NumMergeRegs < NumDefs) { - if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0) + if (NumDefs % NumMergeRegs != 0) return false; Builder.setInstr(MI); @@ -264,7 +267,22 @@ public: ++j, ++DefIdx) DstRegs.push_back(MI.getOperand(DefIdx).getReg()); - Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + if (ConvertOp) { + SmallVector<Register, 2> TmpRegs; + // This is a vector that is being scalarized and casted. Extract to + // the element type, and do the conversion on the scalars. + LLT MergeEltTy + = MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); + for (unsigned j = 0; j < NumMergeRegs; ++j) + TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy)); + + Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); + + for (unsigned j = 0; j < NumMergeRegs; ++j) + Builder.buildInstr(ConvertOp, {DstRegs[j]}, {TmpRegs[j]}); + } else { + Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + } } } else if (NumMergeRegs > NumDefs) { diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index a0f21e8b19d7..fbfe71255a38 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -200,6 +200,13 @@ public: LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); + LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -219,9 +226,17 @@ public: LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); + LegalizeResult lowerFMad(MachineInstr &MI); + LegalizeResult lowerUnmergeValues(MachineInstr &MI); + LegalizeResult lowerShuffleVector(MachineInstr &MI); + LegalizeResult lowerDynStackAlloc(MachineInstr &MI); + LegalizeResult lowerExtract(MachineInstr &MI); + LegalizeResult lowerInsert(MachineInstr &MI); + LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); private: MachineRegisterInfo &MRI; @@ -236,6 +251,11 @@ createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args); +/// Create a libcall to memcpy et al. +LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, + MachineInstr &MI); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 513c98f2d23f..1cf62d1fde59 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -331,6 +331,8 @@ class LegalizeRuleSet { /// individually handled. SmallBitVector TypeIdxsCovered{MCOI::OPERAND_LAST_GENERIC - MCOI::OPERAND_FIRST_GENERIC + 2}; + SmallBitVector ImmIdxsCovered{MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM + 2}; #endif unsigned typeIdx(unsigned TypeIdx) { @@ -342,9 +344,21 @@ class LegalizeRuleSet { #endif return TypeIdx; } - void markAllTypeIdxsAsCovered() { + + unsigned immIdx(unsigned ImmIdx) { + assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM) && + "Imm Index is out of bounds"); +#ifndef NDEBUG + ImmIdxsCovered.set(ImmIdx); +#endif + return ImmIdx; + } + + void markAllIdxsAsCovered() { #ifndef NDEBUG TypeIdxsCovered.set(); + ImmIdxsCovered.set(); #endif } @@ -403,6 +417,15 @@ class LegalizeRuleSet { return actionIf(Action, typePairInSet(typeIdx(0), typeIdx(1), Types), Mutation); } + /// Use the given action when type index 0 is any type in the given list and + /// imm index 0 is anything. Action should not be an action that requires + /// mutation. + LegalizeRuleSet &actionForTypeWithAnyImm(LegalizeAction Action, + std::initializer_list<LLT> Types) { + using namespace LegalityPredicates; + immIdx(0); // Inform verifier imm idx 0 is handled. + return actionIf(Action, typeInSet(typeIdx(0), Types)); + } /// Use the given action when type indexes 0 and 1 are both in the given list. /// That is, the type pair is in the cartesian product of the list. /// Action should not be an action that requires mutation. @@ -454,7 +477,7 @@ public: LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that the free-form // user-provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Legal, Predicate); } /// The instruction is legal when type index 0 is any type in the given list. @@ -466,6 +489,12 @@ public: LegalizeRuleSet &legalFor(std::initializer_list<std::pair<LLT, LLT>> Types) { return actionFor(LegalizeAction::Legal, Types); } + /// The instruction is legal when type index 0 is any type in the given list + /// and imm index 0 is anything. + LegalizeRuleSet &legalForTypeWithAnyImm(std::initializer_list<LLT> Types) { + markAllIdxsAsCovered(); + return actionForTypeWithAnyImm(LegalizeAction::Legal, Types); + } /// The instruction is legal when type indexes 0 and 1 along with the memory /// size and minimum alignment is any type and size tuple in the given list. LegalizeRuleSet &legalForTypesWithMemDesc( @@ -497,7 +526,7 @@ public: LegalizeRuleSet &alwaysLegal() { using namespace LegalizeMutations; - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Legal, always); } @@ -506,7 +535,7 @@ public: using namespace LegalizeMutations; // We have no choice but conservatively assume that predicate-less lowering // properly handles all type indices by design: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, always); } /// The instruction is lowered if predicate is true. Keep type index 0 as the @@ -515,7 +544,7 @@ public: using namespace LegalizeMutations; // We have no choice but conservatively assume that lowering with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, Predicate); } /// The instruction is lowered if predicate is true. @@ -523,7 +552,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that lowering with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Lower, Predicate, Mutation); } /// The instruction is lowered when type index 0 is any type in the given @@ -571,7 +600,7 @@ public: LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a libcall with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Libcall, Predicate); } LegalizeRuleSet &libcallFor(std::initializer_list<LLT> Types) { @@ -597,7 +626,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::WidenScalar, Predicate, Mutation); } /// Narrow the scalar to the one selected by the mutation if the predicate is @@ -606,7 +635,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation); } @@ -616,7 +645,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::MoreElements, Predicate, Mutation); } /// Remove elements to reach the type selected by the mutation if the @@ -625,7 +654,7 @@ public: LegalizeMutation Mutation) { // We have no choice but conservatively assume that an action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::FewerElements, Predicate, Mutation); } @@ -640,11 +669,15 @@ public: return actionIf(LegalizeAction::Unsupported, LegalityPredicates::memSizeInBytesNotPow2(0)); } + LegalizeRuleSet &lowerIfMemSizeNotPow2() { + return actionIf(LegalizeAction::Lower, + LegalityPredicates::memSizeInBytesNotPow2(0)); + } LegalizeRuleSet &customIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a custom action with a // free-form user provided Predicate properly handles all type indices: - markAllTypeIdxsAsCovered(); + markAllIdxsAsCovered(); return actionIf(LegalizeAction::Custom, Predicate); } LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) { @@ -882,6 +915,10 @@ public: /// LegalizeRuleSet in any way at all. /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. bool verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const; + /// Check if there is no imm index which is obviously not handled by the + /// LegalizeRuleSet in any way at all. + /// \pre Type indices of the opcode form a dense [0, \p NumTypeIdxs) set. + bool verifyImmIdxsCoverage(unsigned NumImmIdxs) const; /// Apply the ruleset to the given LegalityQuery. LegalizeActionStep apply(const LegalityQuery &Query) const; diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 13eddd9539fa..be12341f5763 100644 --- a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -21,7 +21,7 @@ namespace llvm { namespace MIPatternMatch { template <typename Reg, typename Pattern> -bool mi_match(Reg R, MachineRegisterInfo &MRI, Pattern &&P) { +bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P) { return P.match(MRI, R); } @@ -30,7 +30,7 @@ template <typename SubPatternT> struct OneUse_match { SubPatternT SubPat; OneUse_match(const SubPatternT &SP) : SubPat(SP) {} - bool match(MachineRegisterInfo &MRI, unsigned Reg) { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg); } }; @@ -71,7 +71,7 @@ inline operand_type_match m_Reg() { return operand_type_match(); } /// Matching combinators. template <typename... Preds> struct And { template <typename MatchSrc> - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return true; } }; @@ -83,14 +83,14 @@ struct And<Pred, Preds...> : And<Preds...> { : And<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) { } template <typename MatchSrc> - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return P.match(MRI, src) && And<Preds...>::match(MRI, src); } }; template <typename... Preds> struct Or { template <typename MatchSrc> - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return false; } }; @@ -101,7 +101,7 @@ struct Or<Pred, Preds...> : Or<Preds...> { Or(Pred &&p, Preds &&... preds) : Or<Preds...>(std::forward<Preds>(preds)...), P(std::forward<Pred>(p)) {} template <typename MatchSrc> - bool match(MachineRegisterInfo &MRI, MatchSrc &&src) { + bool match(const MachineRegisterInfo &MRI, MatchSrc &&src) { return P.match(MRI, src) || Or<Preds...>::match(MRI, src); } }; @@ -175,7 +175,8 @@ struct BinaryOp_match { RHS_P R; BinaryOp_match(const LHS_P &LHS, const RHS_P &RHS) : L(LHS), R(RHS) {} - template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + template <typename OpTy> + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { MachineInstr *TmpMI; if (mi_match(Op, MRI, m_MInstr(TmpMI))) { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 3) { @@ -242,7 +243,8 @@ template <typename SrcTy, unsigned Opcode> struct UnaryOp_match { SrcTy L; UnaryOp_match(const SrcTy &LHS) : L(LHS) {} - template <typename OpTy> bool match(MachineRegisterInfo &MRI, OpTy &&Op) { + template <typename OpTy> + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { MachineInstr *TmpMI; if (mi_match(Op, MRI, m_MInstr(TmpMI))) { if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 2) { @@ -323,7 +325,7 @@ struct CheckType { LLT Ty; CheckType(const LLT &Ty) : Ty(Ty) {} - bool match(MachineRegisterInfo &MRI, unsigned Reg) { + bool match(const MachineRegisterInfo &MRI, unsigned Reg) { return MRI.getType(Reg) == Ty; } }; diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 10d712176b1b..416f9c19f794 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -122,14 +122,22 @@ class SrcOp { MachineInstrBuilder SrcMIB; Register Reg; CmpInst::Predicate Pred; + int64_t Imm; }; public: - enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate }; + enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate, Ty_Imm }; SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {} SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {} SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {} SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {} + /// Use of registers held in unsigned integer variables (or more rarely signed + /// integers) is no longer permitted to avoid ambiguity with upcoming support + /// for immediates. + SrcOp(unsigned) = delete; + SrcOp(int) = delete; + SrcOp(uint64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {} + SrcOp(int64_t V) : Imm(V), Ty(SrcType::Ty_Imm) {} void addSrcToMIB(MachineInstrBuilder &MIB) const { switch (Ty) { @@ -142,12 +150,16 @@ public: case SrcType::Ty_MIB: MIB.addUse(SrcMIB->getOperand(0).getReg()); break; + case SrcType::Ty_Imm: + MIB.addImm(Imm); + break; } } LLT getLLTTy(const MachineRegisterInfo &MRI) const { switch (Ty) { case SrcType::Ty_Predicate: + case SrcType::Ty_Imm: llvm_unreachable("Not a register operand"); case SrcType::Ty_Reg: return MRI.getType(Reg); @@ -160,6 +172,7 @@ public: Register getReg() const { switch (Ty) { case SrcType::Ty_Predicate: + case SrcType::Ty_Imm: llvm_unreachable("Not a register operand"); case SrcType::Ty_Reg: return Reg; @@ -178,6 +191,15 @@ public: } } + int64_t getImm() const { + switch (Ty) { + case SrcType::Ty_Imm: + return Imm; + default: + llvm_unreachable("Not an immediate"); + } + } + SrcType getSrcOpKind() const { return Ty; } private: @@ -348,6 +370,17 @@ public: /// given. Convert "llvm.dbg.label Label" to "DBG_LABEL Label". MachineInstrBuilder buildDbgLabel(const MDNode *Label); + /// Build and insert \p Res = G_DYN_STACKALLOC \p Size, \p Align + /// + /// G_DYN_STACKALLOC does a dynamic stack allocation and writes the address of + /// the allocated memory into \p Res. + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with pointer type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildDynStackAlloc(const DstOp &Res, const SrcOp &Size, + unsigned Align); + /// Build and insert \p Res = G_FRAME_INDEX \p Idx /// /// G_FRAME_INDEX materializes the address of an alloca value or other @@ -489,11 +522,21 @@ public: return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src}); } + /// Build and insert a G_INTTOPTR instruction. + MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_INTTOPTR, {Dst}, {Src}); + } + /// Build and insert \p Dst = G_BITCAST \p Src MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src}); } + /// Build and insert \p Dst = G_ADDRSPACE_CAST \p Src + MachineInstrBuilder buildAddrSpaceCast(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_ADDRSPACE_CAST, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; @@ -867,7 +910,8 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + Optional<unsigned> Flags = None); /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1 /// @@ -880,7 +924,8 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + Optional<unsigned> Flags = None); /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx @@ -961,8 +1006,8 @@ public: /// same type. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes, - Register Addr, Register Val, + MachineInstrBuilder buildAtomicRMW(unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, MachineMemOperand &MMO); /// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`. @@ -1135,6 +1180,16 @@ public: MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr, Register Val, MachineMemOperand &MMO); + /// Build and insert `OldValRes<def> = G_ATOMICRMW_FADD Addr, Val, MMO`. + MachineInstrBuilder buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + + /// Build and insert `OldValRes<def> = G_ATOMICRMW_FSUB Addr, Val, MMO`. + MachineInstrBuilder buildAtomicRMWFSub( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO); + /// Build and insert `G_FENCE Ordering, Scope`. MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope); @@ -1210,6 +1265,12 @@ public: return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags); } + MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FMUL, {Dst}, {Src0, Src1}, Flags); + } + MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional<unsigned> Flags = None) { @@ -1300,8 +1361,9 @@ public: /// Build and insert \p Res = G_FADD \p Op0, \p Op1 MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, - const SrcOp &Src1) { - return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}); + const SrcOp &Src1, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags); } /// Build and insert \p Res = G_FSUB \p Op0, \p Op1 @@ -1316,14 +1378,23 @@ public: return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2}); } + /// Build and insert \p Res = G_FMAD \p Op0, \p Op1, \p Op2 + MachineInstrBuilder buildFMAD(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, const SrcOp &Src2, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FMAD, {Dst}, {Src0, Src1, Src2}, Flags); + } + /// Build and insert \p Res = G_FNEG \p Op0 - MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}); + MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags); } /// Build and insert \p Res = G_FABS \p Op0 - MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) { - return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}); + MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0}, Flags); } /// Build and insert \p Dst = G_FCANONICALIZE \p Src0 diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 4cdaa48fb689..8af2853473c2 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -16,6 +16,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MachineValueType.h" namespace llvm { @@ -117,14 +119,16 @@ struct ValueAndVReg { unsigned VReg; }; /// If \p VReg is defined by a statically evaluable chain of -/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true) -/// and that constant fits in int64_t, returns its value as well as -/// the virtual register defined by this G_CONSTANT. -/// When \p LookThroughInstrs == false, this function behaves like +/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true) +/// and that constant fits in int64_t, returns its value as well as the +/// virtual register defined by this G_F/CONSTANT. +/// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. +/// When \p HandleFConstants == false the function bails on G_FCONSTANTs. Optional<ValueAndVReg> getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI, - bool LookThroughInstrs = true); + bool LookThroughInstrs = true, + bool HandleFConstants = true); const ConstantFP* getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI); @@ -151,6 +155,9 @@ Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1, const unsigned Op2, const MachineRegisterInfo &MRI); +Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, const MachineRegisterInfo &MRI); + /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true, /// this returns if \p Val can be assumed to never be a signaling NaN. bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -161,5 +168,10 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) { return isKnownNeverNaN(Val, MRI, true); } +/// Get a rough equivalent of an MVT for a given LLT. +MVT getMVTForLLT(LLT Ty); +/// Get a rough equivalent of an LLT for a given MVT. +LLT getLLTForMVT(MVT Ty); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index acf27dcc5fab..658ad31fa2a6 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -281,7 +281,7 @@ namespace ISD { /// Same as the corresponding unsaturated fixed point instructions, but the /// result is clamped between the min and max values representable by the /// bits of the first 2 operands. - SMULFIXSAT, + SMULFIXSAT, UMULFIXSAT, /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, @@ -301,6 +301,14 @@ namespace ISD { STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, + STRICT_LROUND, STRICT_LLROUND, STRICT_LRINT, STRICT_LLRINT, + + /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or + /// unsigned integer. These have the same semantics as fptosi and fptoui + /// in IR. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FP_TO_SINT, + STRICT_FP_TO_UINT, /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating /// point type down to the precision of the destination VT. TRUNC is a @@ -398,6 +406,13 @@ namespace ISD { /// than the vector element type, and is implicitly truncated to it. SCALAR_TO_VECTOR, + /// SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL + /// duplicated in all lanes. The type of the operand must match the vector + /// element type, except when they are integer types. In this case the + /// operand is allowed to be wider than the vector element type, and is + /// implicitly truncated to it. + SPLAT_VECTOR, + /// MULHU/MULHS - Multiply high - Multiply two integers of type iN, /// producing an unsigned/signed value of type i[2*N], then return the top /// part. @@ -569,13 +584,6 @@ namespace ISD { /// 3 Round to -inf FLT_ROUNDS_, - /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and - /// rounds it to a floating point value. It then promotes it and returns it - /// in a register of the same size. This operation effectively just - /// discards excess precision. The type to round down to is specified by - /// the VT operand, a VTSDNode. - FP_ROUND_INREG, - /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, @@ -958,6 +966,23 @@ namespace ISD { static const int LAST_INDEXED_MODE = POST_DEC + 1; //===--------------------------------------------------------------------===// + /// MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's + /// index parameter when calculating addresses. + /// + /// SIGNED_SCALED Addr = Base + ((signed)Index * sizeof(element)) + /// SIGNED_UNSCALED Addr = Base + (signed)Index + /// UNSIGNED_SCALED Addr = Base + ((unsigned)Index * sizeof(element)) + /// UNSIGNED_UNSCALED Addr = Base + (unsigned)Index + enum MemIndexType { + SIGNED_SCALED = 0, + SIGNED_UNSCALED, + UNSIGNED_SCALED, + UNSIGNED_UNSCALED + }; + + static const int LAST_MEM_INDEX_TYPE = UNSIGNED_UNSCALED + 1; + + //===--------------------------------------------------------------------===// /// LoadExtType enum - This enum defines the three variants of LOADEXT /// (load with extension). /// diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 8bb88165d3e1..290a2381d9c9 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -189,6 +189,10 @@ namespace llvm { return start == Other.start && end == Other.end; } + bool operator!=(const Segment &Other) const { + return !(*this == Other); + } + void dump() const; }; @@ -224,7 +228,7 @@ namespace llvm { /// Constructs a new LiveRange object. LiveRange(bool UseSegmentSet = false) - : segmentSet(UseSegmentSet ? llvm::make_unique<SegmentSet>() + : segmentSet(UseSegmentSet ? std::make_unique<SegmentSet>() : nullptr) {} /// Constructs a new LiveRange object by copying segments and valnos from diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h index 588b0f9cf39c..888d72b87bd1 100644 --- a/include/llvm/CodeGen/LiveIntervals.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -111,30 +111,31 @@ class VirtRegMap; const MachineBlockFrequencyInfo *MBFI, const MachineBasicBlock *MBB); - LiveInterval &getInterval(unsigned Reg) { + LiveInterval &getInterval(Register Reg) { if (hasInterval(Reg)) - return *VirtRegIntervals[Reg]; + return *VirtRegIntervals[Reg.id()]; else return createAndComputeVirtRegInterval(Reg); } - const LiveInterval &getInterval(unsigned Reg) const { + const LiveInterval &getInterval(Register Reg) const { return const_cast<LiveIntervals*>(this)->getInterval(Reg); } - bool hasInterval(unsigned Reg) const { - return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg]; + bool hasInterval(Register Reg) const { + return VirtRegIntervals.inBounds(Reg.id()) && + VirtRegIntervals[Reg.id()]; } /// Interval creation. - LiveInterval &createEmptyInterval(unsigned Reg) { + LiveInterval &createEmptyInterval(Register Reg) { assert(!hasInterval(Reg) && "Interval already exists!"); - VirtRegIntervals.grow(Reg); - VirtRegIntervals[Reg] = createInterval(Reg); - return *VirtRegIntervals[Reg]; + VirtRegIntervals.grow(Reg.id()); + VirtRegIntervals[Reg.id()] = createInterval(Reg); + return *VirtRegIntervals[Reg.id()]; } - LiveInterval &createAndComputeVirtRegInterval(unsigned Reg) { + LiveInterval &createAndComputeVirtRegInterval(Register Reg) { LiveInterval &LI = createEmptyInterval(Reg); computeVirtRegInterval(LI); return LI; diff --git a/include/llvm/CodeGen/LiveRangeCalc.h b/include/llvm/CodeGen/LiveRangeCalc.h new file mode 100644 index 000000000000..08026c05733c --- /dev/null +++ b/include/llvm/CodeGen/LiveRangeCalc.h @@ -0,0 +1,295 @@ +//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeCalc class can be used to compute live ranges from scratch. It +// caches information about values in the CFG to speed up repeated operations +// on the same live range. The cache can be shared by non-overlapping live +// ranges. SplitKit uses that when computing the live range of split products. +// +// A low-level interface is available to clients that know where a variable is +// live, but don't know which value it has as every point. LiveRangeCalc will +// propagate values down the dominator tree, and even insert PHI-defs where +// needed. SplitKit uses this faster interface when possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H +#define LLVM_LIB_CODEGEN_LIVERANGECALC_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" +#include <utility> + +namespace llvm { + +template <class NodeT> class DomTreeNodeBase; +class MachineDominatorTree; +class MachineFunction; +class MachineRegisterInfo; + +using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; + +class LiveRangeCalc { + const MachineFunction *MF = nullptr; + const MachineRegisterInfo *MRI = nullptr; + SlotIndexes *Indexes = nullptr; + MachineDominatorTree *DomTree = nullptr; + VNInfo::Allocator *Alloc = nullptr; + + /// LiveOutPair - A value and the block that defined it. The domtree node is + /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. + using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>; + + /// LiveOutMap - Map basic blocks to the value leaving the block. + using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>; + + /// Bit vector of active entries in LiveOut, also used as a visited set by + /// findReachingDefs. One entry per basic block, indexed by block number. + /// This is kept as a separate bit vector because it can be cleared quickly + /// when switching live ranges. + BitVector Seen; + + /// Map LiveRange to sets of blocks (represented by bit vectors) that + /// in the live range are defined on entry and undefined on entry. + /// A block is defined on entry if there is a path from at least one of + /// the defs in the live range to the entry of the block, and conversely, + /// a block is undefined on entry, if there is no such path (i.e. no + /// definition reaches the entry of the block). A single LiveRangeCalc + /// object is used to track live-out information for multiple registers + /// in live range splitting (which is ok, since the live ranges of these + /// registers do not overlap), but the defined/undefined information must + /// be kept separate for each individual range. + /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }. + using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>; + EntryInfoMap EntryInfos; + + /// Map each basic block where a live range is live out to the live-out value + /// and its defining block. + /// + /// For every basic block, MBB, one of these conditions shall be true: + /// + /// 1. !Seen.count(MBB->getNumber()) + /// Blocks without a Seen bit are ignored. + /// 2. LiveOut[MBB].second.getNode() == MBB + /// The live-out value is defined in MBB. + /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] + /// The live-out value passses through MBB. All predecessors must carry + /// the same value. + /// + /// The domtree node may be null, it can be computed. + /// + /// The map can be shared by multiple live ranges as long as no two are + /// live-out of the same block. + LiveOutMap Map; + + /// LiveInBlock - Information about a basic block where a live range is known + /// to be live-in, but the value has not yet been determined. + struct LiveInBlock { + // The live range set that is live-in to this block. The algorithms can + // handle multiple non-overlapping live ranges simultaneously. + LiveRange &LR; + + // DomNode - Dominator tree node for the block. + // Cleared when the final value has been determined and LI has been updated. + MachineDomTreeNode *DomNode; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. When the final value has been + // determined, the range from the block start to Kill will be added to LI. + SlotIndex Kill; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value = nullptr; + + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill) {} + }; + + /// LiveIn - Work list of blocks where the live-in value has yet to be + /// determined. This list is typically computed by findReachingDefs() and + /// used as a work list by updateSSA(). The low-level interface may also be + /// used to add entries directly. + SmallVector<LiveInBlock, 16> LiveIn; + + /// Check if the entry to block @p MBB can be reached by any of the defs + /// in @p LR. Return true if none of the defs reach the entry to @p MBB. + bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs, + MachineBasicBlock &MBB, BitVector &DefOnEntry, + BitVector &UndefOnEntry); + + /// Find the set of defs that can reach @p Kill. @p Kill must belong to + /// @p UseMBB. + /// + /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill, + /// all paths from the def to @p UseMBB are added to @p LR, and the function + /// returns true. + /// + /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be + /// live in are added to the LiveIn array, and the function returns false. + /// + /// The array @p Undef provides the locations where the range @p LR becomes + /// undefined by <def,read-undef> operands on other subranges. If @p Undef + /// is non-empty and @p Kill is jointly dominated only by the entries of + /// @p Undef, the function returns false. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, SlotIndex Use, + unsigned PhysReg, ArrayRef<SlotIndex> Undefs); + + /// updateSSA - Compute the values that will be live in to all requested + /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. + /// + /// Every live-in block must be jointly dominated by the added live-out + /// blocks. No values are read from the live ranges. + void updateSSA(); + + /// Transfer information from the LiveIn vector to the live ranges and update + /// the given @p LiveOuts. + void updateFromLiveIns(); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// If @p LR is a main range, or if @p LI is null, then all uses must be + /// jointly dominated by the definitions from @p LR. If @p LR is a subrange + /// of the live interval @p LI, corresponding to lane mask @p LaneMask, + /// all uses must be jointly dominated by the definitions from @p LR + /// together with definitions of other lanes where @p LR becomes undefined + /// (via <def,read-undef> operands). + /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e. + /// LaneBitmask::getAll(). + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask, + LiveInterval *LI = nullptr); + + /// Reset Map and Seen fields. + void resetLiveOutMap(); + +public: + LiveRangeCalc() = default; + + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Calculate live ranges from scratch. + // + + /// reset - Prepare caches for a new set of non-overlapping live ranges. The + /// caches must be reset before attempting calculations with a live range + /// that may overlap a previously computed live range, and before the first + /// live range in a function. If live ranges are not known to be + /// non-overlapping, call reset before each. + void reset(const MachineFunction *mf, SlotIndexes *SI, + MachineDominatorTree *MDT, VNInfo::Allocator *VNIA); + + //===--------------------------------------------------------------------===// + // Mid-level interface. + //===--------------------------------------------------------------------===// + // + // Modify existing live ranges. + // + + /// Extend the live range of @p LR to reach @p Use. + /// + /// The existing values in @p LR must be live so they jointly dominate @p Use. + /// If @p Use is not dominated by a single existing value, PHI-defs are + /// inserted as required to preserve SSA form. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, + ArrayRef<SlotIndex> Undefs); + + /// createDeadDefs - Create a dead def in LI for every def operand of Reg. + /// Each instruction defining Reg gets a new VNInfo with a corresponding + /// minimal live range. + void createDeadDefs(LiveRange &LR, unsigned Reg); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveRange &LR, unsigned PhysReg) { + extendToUses(LR, PhysReg, LaneBitmask::getAll()); + } + + /// Calculates liveness for the register specified in live interval @p LI. + /// Creates subregister live ranges as needed if subreg liveness tracking is + /// enabled. + void calculate(LiveInterval &LI, bool TrackSubRegs); + + /// For live interval \p LI with correct SubRanges construct matching + /// information for the main live range. Expects the main live range to not + /// have any segments or value numbers. + void constructMainRangeFromSubranges(LiveInterval &LI); + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // These functions can be used to compute live ranges where the live-in and + // live-out blocks are already known, but the SSA value in each block is + // unknown. + // + // After calling reset(), add known live-out values and known live-in blocks. + // Then call calculateValues() to compute the actual value that is + // live-in to each block, and add liveness to the live ranges. + // + + /// setLiveOutValue - Indicate that VNI is live out from MBB. The + /// calculateValues() function will not add liveness for MBB, the caller + /// should take care of that. + /// + /// VNI may be null only if MBB is a live-through block also passed to + /// addLiveInBlock(). + void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { + Seen.set(MBB->getNumber()); + Map[MBB] = LiveOutPair(VNI, nullptr); + } + + /// addLiveInBlock - Add a block with an unknown live-in value. This + /// function can only be called once per basic block. Once the live-in value + /// has been determined, calculateValues() will add liveness to LI. + /// + /// @param LR The live range that is live-in to the block. + /// @param DomNode The domtree node for the block. + /// @param Kill Index in block where LI is killed. If the value is + /// live-through, set Kill = SLotIndex() and also call + /// setLiveOutValue(MBB, 0). + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, + SlotIndex Kill = SlotIndex()) { + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); + } + + /// calculateValues - Calculate the value that will be live-in to each block + /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA + /// form. Add liveness to all live-in blocks up to the Kill point, or the + /// whole block for live-through blocks. + /// + /// Every predecessor of a live-in block must have been given a value with + /// setLiveOutValue, the value may be null for live-trough blocks. + void calculateValues(); + + /// A diagnostic function to check if the end of the block @p MBB is + /// jointly dominated by the blocks corresponding to the slot indices + /// in @p Defs. This function is mainly for use in self-verification + /// checks. + LLVM_ATTRIBUTE_UNUSED + static bool isJointlyDominated(const MachineBasicBlock *MBB, + ArrayRef<SlotIndex> Defs, + const SlotIndexes &Indexes); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index 7dbb2feab8bf..314afad92970 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -53,8 +53,8 @@ public: ModifiedRegUnits.addRegsInMask(O->getRegMask()); if (!O->isReg()) continue; - unsigned Reg = O->getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = O->getReg(); + if (!Reg.isPhysical()) continue; if (O->isDef()) { // Some architectures (e.g. AArch64 XZR/WZR) have registers that are diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 94e76a75e8da..069d0aa45095 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -314,6 +314,7 @@ struct ScalarEnumerationTraits<TargetStackID::Value> { static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) { IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); + IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 333d0a78618c..ccdde78a0b22 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -103,9 +103,9 @@ private: using LiveInVector = std::vector<RegisterMaskPair>; LiveInVector LiveIns; - /// Alignment of the basic block. Zero if the basic block does not need to be - /// aligned. The alignment is specified as log2(bytes). - unsigned Alignment = 0; + /// Alignment of the basic block. One if the basic block does not need to be + /// aligned. + Align Alignment; /// Indicate that this basic block is entered via an exception handler. bool IsEHPad = false; @@ -312,7 +312,7 @@ public: /// Adds the specified register as a live in. Note that it is an error to add /// the same register to the same set more than once unless the intention is /// to call sortUniqueLiveIns after all registers are added. - void addLiveIn(MCPhysReg PhysReg, + void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask = LaneBitmask::getAll()) { LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask)); } @@ -331,7 +331,7 @@ public: /// Add PhysReg as live in to this block, and ensure that there is a copy of /// PhysReg to a virtual register of class RC. Return the virtual register /// that is a copy of the live in PhysReg. - unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC); + unsigned addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC); /// Remove the specified register from the live in set. void removeLiveIn(MCPhysReg Reg, @@ -372,13 +372,11 @@ public: /// \see getBeginClobberMask() const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const; - /// Return alignment of the basic block. The alignment is specified as - /// log2(bytes). - unsigned getAlignment() const { return Alignment; } + /// Return alignment of the basic block. + Align getAlignment() const { return Alignment; } - /// Set alignment of the basic block. The alignment is specified as - /// log2(bytes). - void setAlignment(unsigned Align) { Alignment = Align; } + /// Set alignment of the basic block. + void setAlignment(Align A) { Alignment = A; } /// Returns true if the block is a landing pad. That is this basic block is /// entered via an exception handler. @@ -636,6 +634,18 @@ public: return Insts.insertAfter(I.getInstrIterator(), MI); } + /// If I is bundled then insert MI into the instruction list after the end of + /// the bundle, otherwise insert MI immediately after I. + instr_iterator insertAfterBundle(instr_iterator I, MachineInstr *MI) { + assert((I == instr_end() || I->getParent() == this) && + "iterator points outside of basic block"); + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + while (I->isBundledWithSucc()) + ++I; + return Insts.insertAfter(I, MI); + } + /// Remove an instruction from the instruction list and delete it. /// /// If the instruction is part of a bundle, the other instructions in the @@ -723,6 +733,10 @@ public: /// CFG so that it branches to 'New' instead. void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Update all phi nodes in this basic block to refer to basic block \p New + /// instead of basic block \p Old. + void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Various pieces of code can cause excess edges in the CFG to be inserted. /// If we have proven that MBB can only branch to DestA and DestB, remove any /// other MBB successors from the CFG. DestA and DestB can be null. Besides diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 4f4034baf801..503227222207 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -39,6 +39,10 @@ enum class MachineCombinerPattern { MULADDXI_OP1, MULSUBXI_OP1, // Floating Point + FMULADDH_OP1, + FMULADDH_OP2, + FMULSUBH_OP1, + FMULSUBH_OP2, FMULADDS_OP1, FMULADDS_OP2, FMULSUBS_OP1, @@ -47,16 +51,25 @@ enum class MachineCombinerPattern { FMULADDD_OP2, FMULSUBD_OP1, FMULSUBD_OP2, + FNMULSUBH_OP1, FNMULSUBS_OP1, FNMULSUBD_OP1, FMLAv1i32_indexed_OP1, FMLAv1i32_indexed_OP2, FMLAv1i64_indexed_OP1, FMLAv1i64_indexed_OP2, + FMLAv4f16_OP1, + FMLAv4f16_OP2, + FMLAv8f16_OP1, + FMLAv8f16_OP2, FMLAv2f32_OP2, FMLAv2f32_OP1, FMLAv2f64_OP1, FMLAv2f64_OP2, + FMLAv4i16_indexed_OP1, + FMLAv4i16_indexed_OP2, + FMLAv8i16_indexed_OP1, + FMLAv8i16_indexed_OP2, FMLAv2i32_indexed_OP1, FMLAv2i32_indexed_OP2, FMLAv2i64_indexed_OP1, @@ -67,10 +80,18 @@ enum class MachineCombinerPattern { FMLAv4i32_indexed_OP2, FMLSv1i32_indexed_OP2, FMLSv1i64_indexed_OP2, + FMLSv4f16_OP1, + FMLSv4f16_OP2, + FMLSv8f16_OP1, + FMLSv8f16_OP2, FMLSv2f32_OP1, FMLSv2f32_OP2, FMLSv2f64_OP1, FMLSv2f64_OP2, + FMLSv4i16_indexed_OP1, + FMLSv4i16_indexed_OP2, + FMLSv8i16_indexed_OP1, + FMLSv8i16_indexed_OP2, FMLSv2i32_indexed_OP1, FMLSv2i32_indexed_OP2, FMLSv2i64_indexed_OP1, diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index d2200080b897..e4d7a02f8c48 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -44,6 +44,8 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; /// compute a normal dominator tree. /// class MachineDominatorTree : public MachineFunctionPass { + using DomTreeT = DomTreeBase<MachineBasicBlock>; + /// Helper structure used to hold all the basic blocks /// involved in the split of a critical edge. struct CriticalEdge { @@ -65,8 +67,8 @@ class MachineDominatorTree : public MachineFunctionPass { /// such as BB == elt.NewBB. mutable SmallSet<MachineBasicBlock *, 32> NewBBs; - /// The DominatorTreeBase that is used to compute a normal dominator tree - std::unique_ptr<DomTreeBase<MachineBasicBlock>> DT; + /// The DominatorTreeBase that is used to compute a normal dominator tree. + std::unique_ptr<DomTreeT> DT; /// Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses @@ -80,8 +82,8 @@ public: MachineDominatorTree(); - DomTreeBase<MachineBasicBlock> &getBase() { - if (!DT) DT.reset(new DomTreeBase<MachineBasicBlock>()); + DomTreeT &getBase() { + if (!DT) DT.reset(new DomTreeT()); applySplitCriticalEdges(); return *DT; } @@ -92,31 +94,30 @@ public: /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). /// - inline const SmallVectorImpl<MachineBasicBlock*> &getRoots() const { + const SmallVectorImpl<MachineBasicBlock*> &getRoots() const { applySplitCriticalEdges(); return DT->getRoots(); } - inline MachineBasicBlock *getRoot() const { + MachineBasicBlock *getRoot() const { applySplitCriticalEdges(); return DT->getRoot(); } - inline MachineDomTreeNode *getRootNode() const { + MachineDomTreeNode *getRootNode() const { applySplitCriticalEdges(); return DT->getRootNode(); } bool runOnMachineFunction(MachineFunction &F) override; - inline bool dominates(const MachineDomTreeNode* A, - const MachineDomTreeNode* B) const { + bool dominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); } - inline bool dominates(const MachineBasicBlock* A, - const MachineBasicBlock* B) const { + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); } @@ -133,36 +134,30 @@ public: for (; &*I != A && &*I != B; ++I) /*empty*/ ; - //if(!DT.IsPostDominators) { - // A dominates B if it is found first in the basic block. - return &*I == A; - //} else { - // // A post-dominates B if B is found first in the basic block. - // return &*I == B; - //} + return &*I == A; } - inline bool properlyDominates(const MachineDomTreeNode* A, - const MachineDomTreeNode* B) const { + bool properlyDominates(const MachineDomTreeNode *A, + const MachineDomTreeNode *B) const { applySplitCriticalEdges(); return DT->properlyDominates(A, B); } - inline bool properlyDominates(const MachineBasicBlock* A, - const MachineBasicBlock* B) const { + bool properlyDominates(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->properlyDominates(A, B); } /// findNearestCommonDominator - Find nearest common dominator basic block /// for basic block A and B. If there is no such block then return NULL. - inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { + MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, + MachineBasicBlock *B) { applySplitCriticalEdges(); return DT->findNearestCommonDominator(A, B); } - inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { + MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { applySplitCriticalEdges(); return DT->getNode(BB); } @@ -170,7 +165,7 @@ public: /// getNode - return the (Post)DominatorTree node for the specified basic /// block. This is the same as using operator[] on this class. /// - inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { + MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { applySplitCriticalEdges(); return DT->getNode(BB); } @@ -178,8 +173,8 @@ public: /// addNewBlock - Add a new node to the dominator tree information. This /// creates a new node as a child of DomBB dominator node,linking it into /// the children list of the immediate dominator. - inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, - MachineBasicBlock *DomBB) { + MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB) { applySplitCriticalEdges(); return DT->addNewBlock(BB, DomBB); } @@ -187,14 +182,14 @@ public: /// changeImmediateDominator - This method is used to update the dominator /// tree information when a node's immediate dominator changes. /// - inline void changeImmediateDominator(MachineBasicBlock *N, - MachineBasicBlock* NewIDom) { + void changeImmediateDominator(MachineBasicBlock *N, + MachineBasicBlock *NewIDom) { applySplitCriticalEdges(); DT->changeImmediateDominator(N, NewIDom); } - inline void changeImmediateDominator(MachineDomTreeNode *N, - MachineDomTreeNode* NewIDom) { + void changeImmediateDominator(MachineDomTreeNode *N, + MachineDomTreeNode *NewIDom) { applySplitCriticalEdges(); DT->changeImmediateDominator(N, NewIDom); } @@ -202,14 +197,14 @@ public: /// eraseNode - Removes a node from the dominator tree. Block must not /// dominate any other blocks. Removes node from its immediate dominator's /// children list. Deletes dominator node associated with basic block BB. - inline void eraseNode(MachineBasicBlock *BB) { + void eraseNode(MachineBasicBlock *BB) { applySplitCriticalEdges(); DT->eraseNode(BB); } /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. - inline void splitBlock(MachineBasicBlock* NewBB) { + void splitBlock(MachineBasicBlock* NewBB) { applySplitCriticalEdges(); DT->splitBlock(NewBB); } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 761735120a64..01fc50d14a7f 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" #include <cassert> #include <vector> @@ -129,7 +130,7 @@ private: uint64_t Size; // The required alignment of this stack slot. - unsigned Alignment; + Align Alignment; // If true, the value of the stack object is set before // entering the function and is not modified inside the function. By @@ -180,17 +181,16 @@ private: uint8_t SSPLayout; - StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset, + StackObject(uint64_t Size, Align Alignment, int64_t SPOffset, bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, bool IsAliased, uint8_t StackID = 0) - : SPOffset(SPOffset), Size(Size), Alignment(Alignment), - isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), - StackID(StackID), Alloca(Alloca), isAliased(IsAliased), - SSPLayout(SSPLK_None) {} + : SPOffset(SPOffset), Size(Size), Alignment(Alignment), + isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID), + Alloca(Alloca), isAliased(IsAliased), SSPLayout(SSPLK_None) {} }; /// The alignment of the stack. - unsigned StackAlignment; + Align StackAlignment; /// Can the stack be realigned. This can be false if the target does not /// support stack realignment, or if the user asks us not to realign the @@ -260,7 +260,7 @@ private: /// native alignment maintained by the compiler, dynamic alignment code will /// be needed. /// - unsigned MaxAlignment = 0; + Align MaxAlignment; /// Set to true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -304,7 +304,7 @@ private: /// Required alignment of the local object blob, which is the strictest /// alignment of any object in it. - unsigned LocalFrameMaxAlign = 0; + Align LocalFrameMaxAlign; /// Whether the local object blob needs to be allocated together. If not, /// PEI should ignore the isPreAllocated flags on the stack objects and @@ -338,8 +338,8 @@ private: public: explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable, bool ForcedRealign) - : StackAlignment(StackAlignment), StackRealignable(StackRealignable), - ForcedRealign(ForcedRealign) {} + : StackAlignment(assumeAligned(StackAlignment)), + StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {} /// Return true if there are any stack objects in this function. bool hasStackObjects() const { return !Objects.empty(); } @@ -419,10 +419,12 @@ public: /// Required alignment of the local object blob, /// which is the strictest alignment of any object in it. - void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; } + void setLocalFrameMaxAlign(Align Alignment) { + LocalFrameMaxAlign = Alignment; + } /// Return the required alignment of the local object blob. - unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; } + Align getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; } /// Get whether the local allocation blob should be allocated together or /// let PEI allocate the locals in it directly. @@ -462,14 +464,14 @@ public: unsigned getObjectAlignment(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - return Objects[ObjectIdx+NumFixedObjects].Alignment; + return Objects[ObjectIdx + NumFixedObjects].Alignment.value(); } /// setObjectAlignment - Change the alignment of the specified stack object. void setObjectAlignment(int ObjectIdx, unsigned Align) { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - Objects[ObjectIdx+NumFixedObjects].Alignment = Align; + Objects[ObjectIdx + NumFixedObjects].Alignment = assumeAligned(Align); // Only ensure max alignment for the default stack. if (getStackID(ObjectIdx) == 0) @@ -561,10 +563,14 @@ public: /// Return the alignment in bytes that this function must be aligned to, /// which is greater than the default stack alignment provided by the target. - unsigned getMaxAlignment() const { return MaxAlignment; } + unsigned getMaxAlignment() const { return MaxAlignment.value(); } /// Make sure the function is at least Align bytes aligned. - void ensureMaxAlignment(unsigned Align); + void ensureMaxAlignment(Align Alignment); + /// FIXME: Remove this once transition to Align is over. + inline void ensureMaxAlignment(unsigned Align) { + ensureMaxAlignment(assumeAligned(Align)); + } /// Return true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -728,12 +734,24 @@ public: /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. - int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, + int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca = nullptr, uint8_t ID = 0); + /// FIXME: Remove this function when transition to Align is over. + inline int CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSpillSlot, + const AllocaInst *Alloca = nullptr, + uint8_t ID = 0) { + return CreateStackObject(Size, assumeAligned(Alignment), isSpillSlot, + Alloca, ID); + } /// Create a new statically sized stack object that represents a spill slot, /// returning a nonnegative identifier to represent it. - int CreateSpillStackObject(uint64_t Size, unsigned Alignment); + int CreateSpillStackObject(uint64_t Size, Align Alignment); + /// FIXME: Remove this function when transition to Align is over. + inline int CreateSpillStackObject(uint64_t Size, unsigned Alignment) { + return CreateSpillStackObject(Size, assumeAligned(Alignment)); + } /// Remove or mark dead a statically sized stack object. void RemoveStackObject(int ObjectIdx) { @@ -744,7 +762,11 @@ public: /// Notify the MachineFrameInfo object that a variable sized object has been /// created. This must be created whenever a variable sized object is /// created, whether or not the index returned is actually used. - int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca); + int CreateVariableSizedObject(Align Alignment, const AllocaInst *Alloca); + /// FIXME: Remove this function when transition to Align is over. + int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { + return CreateVariableSizedObject(assumeAligned(Alignment), Alloca); + } /// Returns a reference to call saved info vector for the current function. const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const { diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 201c126ee52e..3a3176e51c51 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -36,6 +36,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Recycler.h" +#include "llvm/Target/TargetMachine.h" #include <cassert> #include <cstdint> #include <memory> @@ -277,7 +278,7 @@ class MachineFunction { unsigned FunctionNumber; /// Alignment - The alignment of the function. - unsigned Alignment; + Align Alignment; /// ExposesReturnsTwice - True if the function calls setjmp or related /// functions with attribute "returns twice", but doesn't have @@ -322,7 +323,7 @@ class MachineFunction { std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations; /// CodeView heapallocsites. - std::vector<std::tuple<MCSymbol*, MCSymbol*, DIType*>> + std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>> CodeViewHeapAllocSites; bool CallsEHReturn = false; @@ -400,6 +401,17 @@ private: /// Map a call instruction to call site arguments forwarding info. CallSiteInfoMap CallSitesInfo; + /// A helper function that returns call site info for a give call + /// instruction if debug entry value support is enabled. + CallSiteInfoMap::iterator getCallSiteInfo(const MachineInstr *MI) { + assert(MI->isCall() && + "Call site info refers only to call instructions!"); + + if (!Target.Options.EnableDebugEntryValues) + return CallSitesInfo.end(); + return CallSitesInfo.find(MI); + } + // Callbacks for insertion and removal. void handleInsertion(MachineInstr &MI); void handleRemoval(MachineInstr &MI); @@ -508,15 +520,16 @@ public: const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; } WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; } - /// getAlignment - Return the alignment (log2, not bytes) of the function. - unsigned getAlignment() const { return Alignment; } + /// getAlignment - Return the alignment of the function. + Align getAlignment() const { return Alignment; } - /// setAlignment - Set the alignment (log2, not bytes) of the function. - void setAlignment(unsigned A) { Alignment = A; } + /// setAlignment - Set the alignment of the function. + void setAlignment(Align A) { Alignment = A; } - /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned. - void ensureAlignment(unsigned A) { - if (Alignment < A) Alignment = A; + /// ensureAlignment - Make sure the function is at least A bytes aligned. + void ensureAlignment(Align A) { + if (Alignment < A) + Alignment = A; } /// exposesReturnsTwice - Returns true if the function calls setjmp or @@ -935,10 +948,10 @@ public: } /// Record heapallocsites - void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD); + void addCodeViewHeapAllocSite(MachineInstr *I, const MDNode *MD); - ArrayRef<std::tuple<MCSymbol*, MCSymbol*, DIType*>> - getCodeViewHeapAllocSites() const { + ArrayRef<std::tuple<MCSymbol *, MCSymbol *, const DIType *>> + getCodeViewHeapAllocSites() const { return CodeViewHeapAllocSites; } @@ -976,12 +989,24 @@ public: return CallSitesInfo; } - /// Update call sites info by deleting entry for \p Old call instruction. - /// If \p New is present then transfer \p Old call info to it. This function - /// should be called before removing call instruction or before replacing - /// call instruction with new one. - void updateCallSiteInfo(const MachineInstr *Old, - const MachineInstr *New = nullptr); + /// Following functions update call site info. They should be called before + /// removing, replacing or copying call instruction. + + /// Move the call site info from \p Old to \New call site info. This function + /// is used when we are replacing one call instruction with another one to + /// the same callee. + void moveCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New); + + /// Erase the call site info for \p MI. It is used to remove a call + /// instruction from the instruction stream. + void eraseCallSiteInfo(const MachineInstr *MI); + + /// Copy the call site info from \p Old to \ New. Its usage is when we are + /// making a copy of the instruction that will be inserted at different point + /// of the instruction stream. + void copyCallSiteInfo(const MachineInstr *Old, + const MachineInstr *New); }; //===--------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index c82c5b137507..c94ad292ec96 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -20,11 +20,9 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" @@ -38,6 +36,7 @@ namespace llvm { +class AAResults; template <typename T> class ArrayRef; class DIExpression; class DILocalVariable; @@ -427,6 +426,22 @@ public: return getNumExplicitDefs() + MCID->getNumImplicitDefs(); } + /// Returns true if the instruction has implicit definition. + bool hasImplicitDef() const { + for (unsigned I = getNumExplicitOperands(), E = getNumOperands(); + I != E; ++I) { + const MachineOperand &MO = getOperand(I); + if (MO.isDef() && MO.isImplicit()) + return true; + } + return false; + } + + /// Returns the implicit operands number. + unsigned getNumImplicitOperands() const { + return getNumOperands() - getNumExplicitOperands(); + } + /// Return true if operand \p OpIdx is a subregister index. bool isOperandSubregIdx(unsigned OpIdx) const { assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && @@ -602,6 +617,12 @@ public: return hasPropertyInBundle(1ULL << MCFlag, Type); } + /// Return true if this is an instruction that should go through the usual + /// legalization steps. + bool isPreISelOpcode(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::PreISelOpcode, Type); + } + /// Return true if this instruction can have a variable number of operands. /// In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are @@ -1020,15 +1041,13 @@ public: } /// A DBG_VALUE is an entry value iff its debug expression contains the - /// DW_OP_entry_value DWARF operation. - bool isDebugEntryValue() const { - return isDebugValue() && getDebugExpression()->isEntryValue(); - } + /// DW_OP_LLVM_entry_value operation. + bool isDebugEntryValue() const; /// Return true if the instruction is a debug value which describes a part of /// a variable as unavailable. bool isUndefDebugValue() const { - return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg(); + return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg().isValid(); } bool isPHI() const { @@ -1140,7 +1159,7 @@ public: /// is a read of a super-register. /// This does not count partial redefines of virtual registers as reads: /// %reg1024:6 = OP. - bool readsRegister(unsigned Reg, + bool readsRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterUseOperandIdx(Reg, false, TRI) != -1; } @@ -1148,20 +1167,20 @@ public: /// Return true if the MachineInstr reads the specified virtual register. /// Take into account that a partial define is a /// read-modify-write operation. - bool readsVirtualRegister(unsigned Reg) const { + bool readsVirtualRegister(Register Reg) const { return readsWritesVirtualRegister(Reg).first; } /// Return a pair of bools (reads, writes) indicating if this instruction /// reads or writes Reg. This also considers partial defines. /// If Ops is not null, all operand indices for Reg are added. - std::pair<bool,bool> readsWritesVirtualRegister(unsigned Reg, + std::pair<bool,bool> readsWritesVirtualRegister(Register Reg, SmallVectorImpl<unsigned> *Ops = nullptr) const; /// Return true if the MachineInstr kills the specified register. /// If TargetRegisterInfo is passed, then it also checks if there is /// a kill of a super-register. - bool killsRegister(unsigned Reg, + bool killsRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterUseOperandIdx(Reg, true, TRI) != -1; } @@ -1170,7 +1189,7 @@ public: /// If TargetRegisterInfo is passed, then it also checks /// if there is a def of a super-register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool definesRegister(unsigned Reg, + bool definesRegister(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1; } @@ -1178,38 +1197,38 @@ public: /// Return true if the MachineInstr modifies (fully define or partially /// define) the specified register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const { + bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const { return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1; } /// Returns true if the register is dead in this machine instruction. /// If TargetRegisterInfo is passed, then it also checks /// if there is a dead def of a super-register. - bool registerDefIsDead(unsigned Reg, + bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1; } /// Returns true if the MachineInstr has an implicit-use operand of exactly /// the given register (not considering sub/super-registers). - bool hasRegisterImplicitUseOperand(unsigned Reg) const; + bool hasRegisterImplicitUseOperand(Register Reg) const; /// Returns the operand index that is a use of the specific register or -1 /// if it is not found. It further tightens the search criteria to a use /// that kills the register if isKill is true. - int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false, + int findRegisterUseOperandIdx(Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) const; /// Wrapper for findRegisterUseOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. - MachineOperand *findRegisterUseOperand(unsigned Reg, bool isKill = false, + MachineOperand *findRegisterUseOperand(Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) { int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI); return (Idx == -1) ? nullptr : &getOperand(Idx); } const MachineOperand *findRegisterUseOperand( - unsigned Reg, bool isKill = false, + Register Reg, bool isKill = false, const TargetRegisterInfo *TRI = nullptr) const { return const_cast<MachineInstr *>(this)-> findRegisterUseOperand(Reg, isKill, TRI); @@ -1221,14 +1240,14 @@ public: /// overlap the specified register. If TargetRegisterInfo is non-null, /// then it also checks if there is a def of a super-register. /// This may also return a register mask operand when Overlap is true. - int findRegisterDefOperandIdx(unsigned Reg, + int findRegisterDefOperandIdx(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) const; /// Wrapper for findRegisterDefOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. MachineOperand * - findRegisterDefOperand(unsigned Reg, bool isDead = false, + findRegisterDefOperand(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) { int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI); @@ -1236,7 +1255,7 @@ public: } const MachineOperand * - findRegisterDefOperand(unsigned Reg, bool isDead = false, + findRegisterDefOperand(Register Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = nullptr) const { return const_cast<MachineInstr *>(this)->findRegisterDefOperand( @@ -1283,7 +1302,7 @@ public: /// /// \pre CurRC must not be NULL. const TargetRegisterClass *getRegClassConstraintEffectForVReg( - unsigned Reg, const TargetRegisterClass *CurRC, + Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ExploreBundle = false) const; @@ -1346,39 +1365,39 @@ public: /// Replace all occurrences of FromReg with ToReg:SubIdx, /// properly composing subreg indices where necessary. - void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, + void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo); /// We have determined MI kills a register. Look for the /// operand that uses it and mark it as IsKill. If AddIfNotFound is true, /// add a implicit operand if it's not found. Returns true if the operand /// exists / is added. - bool addRegisterKilled(unsigned IncomingReg, + bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); /// Clear all kill flags affecting Reg. If RegInfo is provided, this includes /// all aliasing registers. - void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo); + void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo); /// We have determined MI defined a register without a use. /// Look for the operand that defines it and mark it as IsDead. If /// AddIfNotFound is true, add a implicit operand if it's not found. Returns /// true if the operand exists / is added. - bool addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, + bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); /// Clear all dead flags on operands defining register @p Reg. - void clearRegisterDeads(unsigned Reg); + void clearRegisterDeads(Register Reg); /// Mark all subregister defs of register @p Reg with the undef flag. /// This function is used when we determined to have a subregister def in an /// otherwise undefined super register. - void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true); + void setRegisterDefReadUndef(Register Reg, bool IsUndef = true); /// We have determined MI defines a register. Make sure there is an operand /// defining Reg. - void addRegisterDefined(unsigned Reg, + void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo = nullptr); /// Mark every physreg used by this instruction as @@ -1386,13 +1405,13 @@ public: /// /// On instructions with register mask operands, also add implicit-def /// operands for all registers in UsedRegs. - void setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, + void setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs, const TargetRegisterInfo &TRI); /// Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. - bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const; + bool isSafeToMove(AAResults *AA, bool &SawStore) const; /// Returns true if this instruction's memory access aliases the memory /// access of Other. @@ -1404,7 +1423,7 @@ public: /// @param AA Optional alias analysis, used to compare memory operands. /// @param Other MachineInstr to check aliasing against. /// @param UseTBAA Whether to pass TBAA information to alias analysis. - bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const; + bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const; /// Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory @@ -1419,7 +1438,7 @@ public: /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const; + bool isDereferenceableInvariantLoad(AAResults *AA) const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. @@ -1603,9 +1622,15 @@ public: /// Scan instructions following MI and collect any matching DBG_VALUEs. void collectDebugValues(SmallVectorImpl<MachineInstr *> &DbgValues); - /// Find all DBG_VALUEs immediately following this instruction that point - /// to a register def in this instruction and point them to \p Reg instead. - void changeDebugValuesDefReg(unsigned Reg); + /// Find all DBG_VALUEs that point to the register def in this instruction + /// and point them to \p Reg instead. + void changeDebugValuesDefReg(Register Reg); + + /// Returns the Intrinsic::ID for this instruction. + /// \pre Must have an intrinsic ID operand. + unsigned getIntrinsicID() const { + return getOperand(getNumExplicitDefs()).getIntrinsicID(); + } private: /// If this instruction is embedded into a MachineFunction, return the @@ -1630,7 +1655,7 @@ private: /// this MI and the given operand index \p OpIdx. /// If the related operand does not constrained Reg, this returns CurRC. const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl( - unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; }; diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 6d7fb72b6bd1..880d4829ac7e 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -85,7 +85,7 @@ public: Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); } /// Add a new virtual register operand. - const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0, + const MachineInstrBuilder &addReg(Register RegNo, unsigned flags = 0, unsigned SubReg = 0) const { assert((flags & 0x1) == 0 && "Passing in 'true' to addReg is forbidden! Use enums instead."); @@ -104,14 +104,14 @@ public: } /// Add a virtual register definition operand. - const MachineInstrBuilder &addDef(unsigned RegNo, unsigned Flags = 0, + const MachineInstrBuilder &addDef(Register RegNo, unsigned Flags = 0, unsigned SubReg = 0) const { return addReg(RegNo, Flags | RegState::Define, SubReg); } /// Add a virtual register use operand. It is an error for Flags to contain /// `RegState::Define` when calling this function. - const MachineInstrBuilder &addUse(unsigned RegNo, unsigned Flags = 0, + const MachineInstrBuilder &addUse(Register RegNo, unsigned Flags = 0, unsigned SubReg = 0) const { assert(!(Flags & RegState::Define) && "Misleading addUse defines register, use addReg instead."); @@ -135,7 +135,7 @@ public: } const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags)); return *this; } @@ -145,42 +145,42 @@ public: return *this; } - const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx, - int Offset = 0, - unsigned char TargetFlags = 0) const { + const MachineInstrBuilder & + addConstantPoolIndex(unsigned Idx, int Offset = 0, + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addJumpTableIndex(unsigned Idx, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags)); return *this; } const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addExternalSymbol(const char *FnName, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags)); return *this; } const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA, int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + unsigned TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags)); return *this; } @@ -250,6 +250,11 @@ public: return *this; } + const MachineInstrBuilder &addShuffleMask(const Constant *Val) const { + MI->addOperand(*MF, MachineOperand::CreateShuffleMask(Val)); + return *this; + } + const MachineInstrBuilder &addSym(MCSymbol *Sym, unsigned char TargetFlags = 0) const { MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags)); @@ -316,7 +321,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, /// This version of the builder sets up the first operand as a /// destination virtual register. inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, - const MCInstrDesc &MCID, unsigned DestReg) { + const MCInstrDesc &MCID, Register DestReg) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)) .addReg(DestReg, RegState::Define); } @@ -327,7 +332,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); @@ -343,7 +348,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { MachineFunction &MF = *BB.getParent(); MachineInstr *MI = MF.CreateMachineInstr(MCID, DL); BB.insert(I, MI); @@ -352,7 +357,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { // Calling the overload for instr_iterator is always correct. However, the // definition is not available in headers, so inline the check. if (I.isInsideBundle()) @@ -362,7 +367,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr &I, inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineInstr *I, const DebugLoc &DL, const MCInstrDesc &MCID, - unsigned DestReg) { + Register DestReg) { return BuildMI(BB, *I, DL, MCID, DestReg); } @@ -416,7 +421,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, /// end of the given MachineBasicBlock, and sets up the first operand as a /// destination virtual register. inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, - const MCInstrDesc &MCID, unsigned DestReg) { + const MCInstrDesc &MCID, Register DestReg) { return BuildMI(*BB, BB->end(), DL, MCID, DestReg); } @@ -426,7 +431,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, /// second operand is an immediate. MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic @@ -442,7 +447,7 @@ MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, const MDNode *Variable, + Register Reg, const MDNode *Variable, const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic @@ -490,16 +495,13 @@ inline unsigned getRenamableRegState(bool B) { /// Get all register state flags from machine operand \p RegOp. inline unsigned getRegState(const MachineOperand &RegOp) { assert(RegOp.isReg() && "Not a register operand"); - return getDefRegState(RegOp.isDef()) | - getImplRegState(RegOp.isImplicit()) | - getKillRegState(RegOp.isKill()) | - getDeadRegState(RegOp.isDead()) | - getUndefRegState(RegOp.isUndef()) | - getInternalReadRegState(RegOp.isInternalRead()) | - getDebugRegState(RegOp.isDebug()) | - getRenamableRegState( - TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) && - RegOp.isRenamable()); + return getDefRegState(RegOp.isDef()) | getImplRegState(RegOp.isImplicit()) | + getKillRegState(RegOp.isKill()) | getDeadRegState(RegOp.isDead()) | + getUndefRegState(RegOp.isUndef()) | + getInternalReadRegState(RegOp.isInternalRead()) | + getDebugRegState(RegOp.isDebug()) | + getRenamableRegState(Register::isPhysicalRegister(RegOp.getReg()) && + RegOp.isRenamable()); } /// Helper class for constructing bundles of MachineInstrs. diff --git a/include/llvm/CodeGen/MachineLoopUtils.h b/include/llvm/CodeGen/MachineLoopUtils.h new file mode 100644 index 000000000000..41379b75d00a --- /dev/null +++ b/include/llvm/CodeGen/MachineLoopUtils.h @@ -0,0 +1,41 @@ +//=- MachineLoopUtils.h - Helper functions for manipulating loops -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H +#define LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H + +namespace llvm { +class MachineBasicBlock; +class MachineRegisterInfo; +class TargetInstrInfo; + +enum LoopPeelDirection { + LPD_Front, ///< Peel the first iteration of the loop. + LPD_Back ///< Peel the last iteration of the loop. +}; + +/// Peels a single block loop. Loop must have two successors, one of which +/// must be itself. Similarly it must have two predecessors, one of which must +/// be itself. +/// +/// The loop block is copied and inserted into the CFG such that two copies of +/// the loop follow on from each other. The copy is inserted either before or +/// after the loop based on Direction. +/// +/// Phis are updated and an unconditional branch inserted at the end of the +/// clone so as to execute a single iteration. +/// +/// The trip count of Loop is not updated. +MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction, + MachineBasicBlock *Loop, + MachineRegisterInfo &MRI, + const TargetInstrInfo *TII); + +} // namespace llvm + +#endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 65f706302bc2..33a48a235e18 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -293,8 +293,6 @@ public: /// Support for operator<<. /// @{ - void print(raw_ostream &OS) const; - void print(raw_ostream &OS, ModuleSlotTracker &MST) const; void print(raw_ostream &OS, ModuleSlotTracker &MST, SmallVectorImpl<StringRef> &SSNs, const LLVMContext &Context, const MachineFrameInfo *MFI, const TargetInstrInfo *TII) const; @@ -319,11 +317,6 @@ public: } }; -inline raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO) { - MRO.print(OS); - return OS; -} - } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 4ff5c7fd013a..6902dada2423 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/IR/PassManager.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" @@ -74,7 +75,10 @@ protected: /// made by different debugging and exception handling schemes and reformated /// for specific use. /// -class MachineModuleInfo : public ImmutablePass { +class MachineModuleInfo { + friend class MachineModuleInfoWrapperPass; + friend class MachineModuleAnalysis; + const LLVMTargetMachine &TM; /// This is the MCContext used for the entire code generator. @@ -140,15 +144,17 @@ class MachineModuleInfo : public ImmutablePass { const Function *LastRequest = nullptr; ///< Used for shortcut/cache. MachineFunction *LastResult = nullptr; ///< Used for shortcut/cache. -public: - static char ID; // Pass identification, replacement for typeid + MachineModuleInfo &operator=(MachineModuleInfo &&MMII) = delete; +public: explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr); - ~MachineModuleInfo() override; - // Initialization and Finalization - bool doInitialization(Module &) override; - bool doFinalization(Module &) override; + MachineModuleInfo(MachineModuleInfo &&MMII); + + ~MachineModuleInfo(); + + void initialize(); + void finalize(); const LLVMTargetMachine &getTarget() const { return TM; } @@ -254,6 +260,38 @@ public: /// \} }; // End class MachineModuleInfo +class MachineModuleInfoWrapperPass : public ImmutablePass { + MachineModuleInfo MMI; + +public: + static char ID; // Pass identification, replacement for typeid + explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr); + + // Initialization and Finalization + bool doInitialization(Module &) override; + bool doFinalization(Module &) override; + + MachineModuleInfo &getMMI() { return MMI; } + const MachineModuleInfo &getMMI() const { return MMI; } +}; + +/// An analysis that produces \c MachineInfo for a module. +class MachineModuleAnalysis : public AnalysisInfoMixin<MachineModuleAnalysis> { + friend AnalysisInfoMixin<MachineModuleAnalysis>; + static AnalysisKey Key; + + const LLVMTargetMachine *TM; + +public: + /// Provide the result type for this analysis pass. + using Result = MachineModuleInfo; + + MachineModuleAnalysis(const LLVMTargetMachine *TM) : TM(TM) {} + + /// Run the analysis pass and produce machine module information. + MachineModuleInfo run(Module &M, ModuleAnalysisManager &); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINEMODULEINFO_H diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 2152c7582e5a..df914dc2d85e 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -23,6 +23,7 @@ namespace llvm { class BlockAddress; +class Constant; class ConstantFP; class ConstantInt; class GlobalValue; @@ -68,7 +69,8 @@ public: MO_CFIIndex, ///< MCCFIInstruction index. MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel - MO_Last = MO_Predicate, + MO_ShuffleMask, ///< Other IR Constant for ISel (shuffle masks) + MO_Last = MO_ShuffleMask }; private: @@ -172,6 +174,7 @@ private: unsigned CFIIndex; // For MO_CFI. Intrinsic::ID IntrinsicID; // For MO_IntrinsicID. unsigned Pred; // For MO_Predicate + const Constant *ShuffleMask; // For MO_ShuffleMask struct { // For MO_Register. // Register number is in SmallContents.RegNo. @@ -341,6 +344,7 @@ public: bool isCFIIndex() const { return OpKind == MO_CFIIndex; } bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; } bool isPredicate() const { return OpKind == MO_Predicate; } + bool isShuffleMask() const { return OpKind == MO_ShuffleMask; } //===--------------------------------------------------------------------===// // Accessors for Register Operands //===--------------------------------------------------------------------===// @@ -455,7 +459,7 @@ public: /// Change the register this operand corresponds to. /// - void setReg(unsigned Reg); + void setReg(Register Reg); void setSubReg(unsigned subReg) { assert(isReg() && "Wrong MachineOperand mutator"); @@ -468,13 +472,13 @@ public: /// using TargetRegisterInfo to compose the subreg indices if necessary. /// Reg must be a virtual register, SubIdx can be 0. /// - void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&); + void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo&); /// substPhysReg - Substitute the current register with the physical register /// Reg, taking any existing SubReg into account. For instance, /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al. /// - void substPhysReg(unsigned Reg, const TargetRegisterInfo&); + void substPhysReg(MCRegister Reg, const TargetRegisterInfo&); void setIsUse(bool Val = true) { setIsDef(!Val); } @@ -579,6 +583,11 @@ public: return Contents.Pred; } + const Constant *getShuffleMask() const { + assert(isShuffleMask() && "Wrong MachineOperand accessor"); + return Contents.ShuffleMask; + } + /// Return the offset from the symbol in this operand. This always returns 0 /// for ExternalSymbol operands. int64_t getOffset() const { @@ -717,11 +726,11 @@ public: void ChangeToFPImmediate(const ConstantFP *FPImm); /// ChangeToES - Replace this operand with a new external symbol operand. - void ChangeToES(const char *SymName, unsigned char TargetFlags = 0); + void ChangeToES(const char *SymName, unsigned TargetFlags = 0); /// ChangeToGA - Replace this operand with a new global address operand. void ChangeToGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. void ChangeToMCSymbol(MCSymbol *Sym); @@ -731,12 +740,12 @@ public: /// Replace this operand with a target index. void ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. - void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false, + void ChangeToRegister(Register Reg, bool isDef, bool isImp = false, bool isKill = false, bool isDead = false, bool isUndef = false, bool isDebug = false); @@ -762,7 +771,7 @@ public: return Op; } - static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false, + static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp = false, bool isKill = false, bool isDead = false, bool isUndef = false, bool isEarlyClobber = false, @@ -788,7 +797,7 @@ public: return Op; } static MachineOperand CreateMBB(MachineBasicBlock *MBB, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_MachineBasicBlock); Op.setMBB(MBB); Op.setTargetFlags(TargetFlags); @@ -800,7 +809,7 @@ public: return Op; } static MachineOperand CreateCPI(unsigned Idx, int Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ConstantPoolIndex); Op.setIndex(Idx); Op.setOffset(Offset); @@ -808,21 +817,21 @@ public: return Op; } static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_TargetIndex); Op.setIndex(Idx); Op.setOffset(Offset); Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) { + static MachineOperand CreateJTI(unsigned Idx, unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_JumpTableIndex); Op.setIndex(Idx); Op.setTargetFlags(TargetFlags); return Op; } static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_GlobalAddress); Op.Contents.OffsetedInfo.Val.GV = GV; Op.setOffset(Offset); @@ -830,7 +839,7 @@ public: return Op; } static MachineOperand CreateES(const char *SymName, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ExternalSymbol); Op.Contents.OffsetedInfo.Val.SymbolName = SymName; Op.setOffset(0); // Offset is always 0. @@ -838,7 +847,7 @@ public: return Op; } static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_BlockAddress); Op.Contents.OffsetedInfo.Val.BA = BA; Op.setOffset(Offset); @@ -876,7 +885,7 @@ public: } static MachineOperand CreateMCSymbol(MCSymbol *Sym, - unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_MCSymbol); Op.Contents.Sym = Sym; Op.setOffset(0); @@ -902,6 +911,12 @@ public: return Op; } + static MachineOperand CreateShuffleMask(const Constant *C) { + MachineOperand Op(MachineOperand::MO_ShuffleMask); + Op.Contents.ShuffleMask = C; + return Op; + } + friend class MachineInstr; friend class MachineRegisterInfo; diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h index 03ca53072685..e9cf7e115bff 100644 --- a/include/llvm/CodeGen/MachinePipeliner.h +++ b/include/llvm/CodeGen/MachinePipeliner.h @@ -40,6 +40,8 @@ #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H +#include "llvm/Analysis/AliasAnalysis.h" + #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" @@ -148,7 +150,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { /// We may create a new instruction, so remember it because it /// must be deleted when the pass is finished. - SmallPtrSet<MachineInstr *, 4> NewMIs; + DenseMap<MachineInstr*, MachineInstr *> NewMIs; /// Ordered list of DAG postprocessing steps. std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; @@ -200,7 +202,7 @@ public: RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) { P.MF->getSubtarget().getSMSMutations(Mutations); if (SwpEnableCopyToPhi) - Mutations.push_back(llvm::make_unique<CopyToPhiMutation>()); + Mutations.push_back(std::make_unique<CopyToPhiMutation>()); } void schedule() override; @@ -297,53 +299,8 @@ private: void computeNodeOrder(NodeSetType &NodeSets); void checkValidNodeOrder(const NodeSetType &Circuits) const; bool schedulePipeline(SMSchedule &Schedule); - void generatePipelinedLoop(SMSchedule &Schedule); - void generateProlog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &PrologBBs); - void generateEpilog(SMSchedule &Schedule, unsigned LastStage, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); - void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, - MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap, unsigned LastStageNum, - unsigned CurStageNum, bool IsLast); - void removeDeadInstructions(MachineBasicBlock *KernelBB, - MBBVectorTy &EpilogBBs); - void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule); - void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, - MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, - SMSchedule &Schedule, ValueMapTy *VRMap); bool computeDelta(MachineInstr &MI, unsigned &Delta); - void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, - unsigned Num); - MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum); - MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, - unsigned InstStageNum, - SMSchedule &Schedule); - void updateInstruction(MachineInstr *NewMI, bool LastDef, - unsigned CurStageNum, unsigned InstrStageNum, - SMSchedule &Schedule, ValueMapTy *VRMap); MachineInstr *findDefInLoop(unsigned Reg); - unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, - unsigned LoopStage, ValueMapTy *VRMap, - MachineBasicBlock *BB); - void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, - SMSchedule &Schedule, ValueMapTy *VRMap, - InstrMapTy &InstrMap); - void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule, - InstrMapTy &InstrMap, unsigned CurStageNum, - unsigned PhiNum, MachineInstr *Phi, - unsigned OldReg, unsigned NewReg, - unsigned PrevReg = 0); bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos, unsigned &NewBase, int64_t &NewOffset); @@ -529,12 +486,6 @@ private: /// Map from instruction to execution cycle. std::map<SUnit *, int> InstrToCycle; - /// Map for each register and the max difference between its uses and def. - /// The first element in the pair is the max difference in stages. The - /// second is true if the register defines a Phi value and loop value is - /// scheduled before the Phi. - std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff; - /// Keep track of the first cycle value in the schedule. It starts /// as zero, but the algorithm allows negative values. int FirstCycle = 0; @@ -560,7 +511,6 @@ public: void reset() { ScheduledInstrs.clear(); InstrToCycle.clear(); - RegToStageDiff.clear(); FirstCycle = 0; LastCycle = 0; InitiationInterval = 0; @@ -620,28 +570,6 @@ public: return (LastCycle - FirstCycle) / InitiationInterval; } - /// Return the max. number of stages/iterations that can occur between a - /// register definition and its uses. - unsigned getStagesForReg(int Reg, unsigned CurStage) { - std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; - if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second) - return 1; - return Stages.first; - } - - /// The number of stages for a Phi is a little different than other - /// instructions. The minimum value computed in RegToStageDiff is 1 - /// because we assume the Phi is needed for at least 1 iteration. - /// This is not the case if the loop value is scheduled prior to the - /// Phi in the same stage. This function returns the number of stages - /// or iterations needed between the Phi definition and any uses. - unsigned getStagesForPhi(int Reg) { - std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; - if (Stages.second) - return Stages.first; - return Stages.first - 1; - } - /// Return the instructions that are scheduled at the specified cycle. std::deque<SUnit *> &getInstructions(int cycle) { return ScheduledInstrs[cycle]; diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h index b67e6b52ac8f..cb258b5e7b21 100644 --- a/include/llvm/CodeGen/MachinePostDominators.h +++ b/include/llvm/CodeGen/MachinePostDominators.h @@ -16,68 +16,76 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include <memory> namespace llvm { /// -/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used -/// to compute the post-dominator tree. +/// MachinePostDominatorTree - an analysis pass wrapper for DominatorTree +/// used to compute the post-dominator tree for MachineFunctions. /// -struct MachinePostDominatorTree : public MachineFunctionPass { -private: - PostDomTreeBase<MachineBasicBlock> *DT; +class MachinePostDominatorTree : public MachineFunctionPass { + using PostDomTreeT = PostDomTreeBase<MachineBasicBlock>; + std::unique_ptr<PostDomTreeT> PDT; public: static char ID; MachinePostDominatorTree(); - ~MachinePostDominatorTree() override; - FunctionPass *createMachinePostDominatorTreePass(); const SmallVectorImpl<MachineBasicBlock *> &getRoots() const { - return DT->getRoots(); + return PDT->getRoots(); } - MachineDomTreeNode *getRootNode() const { - return DT->getRootNode(); - } + MachineDomTreeNode *getRootNode() const { return PDT->getRootNode(); } MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { - return DT->getNode(BB); + return PDT->getNode(BB); } MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { - return DT->getNode(BB); + return PDT->getNode(BB); } bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { - return DT->dominates(A, B); + return PDT->dominates(A, B); } bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - return DT->dominates(A, B); + return PDT->dominates(A, B); } bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const { - return DT->properlyDominates(A, B); + return PDT->properlyDominates(A, B); } bool properlyDominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { - return DT->properlyDominates(A, B); + return PDT->properlyDominates(A, B); + } + + bool isVirtualRoot(const MachineDomTreeNode *Node) const { + return PDT->isVirtualRoot(Node); } MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A, - MachineBasicBlock *B) { - return DT->findNearestCommonDominator(A, B); + MachineBasicBlock *B) const { + return PDT->findNearestCommonDominator(A, B); } + /// Returns the nearest common dominator of the given blocks. + /// If that tree node is a virtual root, a nullptr will be returned. + MachineBasicBlock * + findNearestCommonDominator(ArrayRef<MachineBasicBlock *> Blocks) const; + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override { PDT.reset(nullptr); } + void verifyAnalysis() const override; void print(llvm::raw_ostream &OS, const Module *M = nullptr) const override; }; } //end of namespace llvm diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h index 6d9fb9b9100a..eeb69fef2c6b 100644 --- a/include/llvm/CodeGen/MachineRegionInfo.h +++ b/include/llvm/CodeGen/MachineRegionInfo.h @@ -22,7 +22,7 @@ namespace llvm { -struct MachinePostDominatorTree; +class MachinePostDominatorTree; class MachineRegion; class MachineRegionNode; class MachineRegionInfo; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index b5deed1f5010..488a5a55a169 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -107,16 +107,16 @@ private: /// getRegUseDefListHead - Return the head pointer for the register use/def /// list for the specified virtual or physical register. - MachineOperand *&getRegUseDefListHead(unsigned RegNo) { - if (TargetRegisterInfo::isVirtualRegister(RegNo)) - return VRegInfo[RegNo].second; - return PhysRegUseDefLists[RegNo]; + MachineOperand *&getRegUseDefListHead(Register RegNo) { + if (RegNo.isVirtual()) + return VRegInfo[RegNo.id()].second; + return PhysRegUseDefLists[RegNo.id()]; } - MachineOperand *getRegUseDefListHead(unsigned RegNo) const { - if (TargetRegisterInfo::isVirtualRegister(RegNo)) - return VRegInfo[RegNo].second; - return PhysRegUseDefLists[RegNo]; + MachineOperand *getRegUseDefListHead(Register RegNo) const { + if (RegNo.isVirtual()) + return VRegInfo[RegNo.id()].second; + return PhysRegUseDefLists[RegNo.id()]; } /// Get the next element in the use-def chain. @@ -214,8 +214,8 @@ public: bool shouldTrackSubRegLiveness(const TargetRegisterClass &RC) const { return subRegLivenessEnabled() && RC.HasDisjunctSubRegs; } - bool shouldTrackSubRegLiveness(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Must pass a VReg"); + bool shouldTrackSubRegLiveness(Register VReg) const { + assert(VReg.isVirtual() && "Must pass a VReg"); return shouldTrackSubRegLiveness(*getRegClass(VReg)); } bool subRegLivenessEnabled() const { @@ -326,7 +326,7 @@ public: /// of the specified register, skipping those marked as Debug. using reg_nodbg_iterator = defusechain_iterator<true, true, true, true, false, false>; - reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { + reg_nodbg_iterator reg_nodbg_begin(Register RegNo) const { return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); } static reg_nodbg_iterator reg_nodbg_end() { @@ -374,7 +374,7 @@ public: /// reg_nodbg_empty - Return true if the only instructions using or defining /// Reg are Debug instructions. - bool reg_nodbg_empty(unsigned RegNo) const { + bool reg_nodbg_empty(Register RegNo) const { return reg_nodbg_begin(RegNo) == reg_nodbg_end(); } @@ -628,10 +628,10 @@ public: /// Return the register class of the specified virtual register. /// This shouldn't be used directly unless \p Reg has a register class. /// \see getRegClassOrNull when this might happen. - const TargetRegisterClass *getRegClass(unsigned Reg) const { - assert(VRegInfo[Reg].first.is<const TargetRegisterClass *>() && + const TargetRegisterClass *getRegClass(Register Reg) const { + assert(VRegInfo[Reg.id()].first.is<const TargetRegisterClass *>() && "Register class not set, wrong accessor"); - return VRegInfo[Reg].first.get<const TargetRegisterClass *>(); + return VRegInfo[Reg.id()].first.get<const TargetRegisterClass *>(); } /// Return the register class of \p Reg, or null if Reg has not been assigned @@ -727,7 +727,7 @@ public: /// Get the low-level type of \p Reg or LLT{} if Reg is not a generic /// (target independent) virtual register. LLT getType(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) + if (Register::isVirtualRegister(Reg) && VRegToType.inBounds(Reg)) return VRegToType[Reg]; return LLT{}; } @@ -760,7 +760,7 @@ public: /// specified virtual register. This is typically used by target, and in case /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; RegAllocHints[VReg].second.clear(); RegAllocHints[VReg].second.push_back(PrefReg); @@ -769,7 +769,7 @@ public: /// addRegAllocationHint - Add a register allocation hint to the hints /// vector for VReg. void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); RegAllocHints[VReg].second.push_back(PrefReg); } @@ -789,17 +789,18 @@ public: /// specified virtual register. If there are many hints, this returns the /// one with the greatest weight. std::pair<unsigned, unsigned> - getRegAllocationHint(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); - unsigned BestHint = (RegAllocHints[VReg].second.size() ? - RegAllocHints[VReg].second[0] : 0); - return std::pair<unsigned, unsigned>(RegAllocHints[VReg].first, BestHint); + getRegAllocationHint(Register VReg) const { + assert(VReg.isVirtual()); + unsigned BestHint = (RegAllocHints[VReg.id()].second.size() ? + RegAllocHints[VReg.id()].second[0] : 0); + return std::pair<unsigned, unsigned>(RegAllocHints[VReg.id()].first, + BestHint); } /// getSimpleHint - same as getRegAllocationHint except it will only return /// a target independent hint. - unsigned getSimpleHint(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + Register getSimpleHint(Register VReg) const { + assert(VReg.isVirtual()); std::pair<unsigned, unsigned> Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } @@ -808,7 +809,7 @@ public: /// register allocation hints for VReg. const std::pair<unsigned, SmallVector<unsigned, 4>> &getRegAllocationHints(unsigned VReg) const { - assert(TargetRegisterInfo::isVirtualRegister(VReg)); + assert(Register::isVirtualRegister(VReg)); return RegAllocHints[VReg]; } @@ -817,6 +818,17 @@ public: /// deleted during LiveDebugVariables analysis. void markUsesInDebugValueAsUndef(unsigned Reg) const; + /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions + /// to refer to the designated register. + void updateDbgUsersToReg(unsigned Reg, + ArrayRef<MachineInstr*> Users) const { + for (MachineInstr *MI : Users) { + assert(MI->isDebugInstr()); + assert(MI->getOperand(0).isReg()); + MI->getOperand(0).setReg(Reg); + } + } + /// Return true if the specified register is modified in this function. /// This checks that no defining machine operands exist for the register or /// any of its aliases. Definitions found on functions marked noreturn are @@ -882,8 +894,8 @@ public: /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - bool isReserved(unsigned PhysReg) const { - return getReservedRegs().test(PhysReg); + bool isReserved(Register PhysReg) const { + return getReservedRegs().test(PhysReg.id()); } /// Returns true when the given register unit is considered reserved. @@ -1164,7 +1176,7 @@ public: PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); - if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + if (Register::isVirtualRegister(RegUnit)) { const TargetRegisterClass *RC = MRI->getRegClass(RegUnit); PSet = TRI->getRegClassPressureSets(RC); Weight = TRI->getRegClassWeight(RC).RegWeight; diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 75a334f61ad0..333367943ac0 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -100,6 +100,7 @@ namespace llvm { extern cl::opt<bool> ForceTopDown; extern cl::opt<bool> ForceBottomUp; +extern cl::opt<bool> VerifyScheduling; class LiveIntervals; class MachineDominatorTree; diff --git a/include/llvm/CodeGen/ModuloSchedule.h b/include/llvm/CodeGen/ModuloSchedule.h new file mode 100644 index 000000000000..81a9b63b64ca --- /dev/null +++ b/include/llvm/CodeGen/ModuloSchedule.h @@ -0,0 +1,367 @@ +//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Software pipelining (SWP) is an instruction scheduling technique for loops +// that overlaps loop iterations and exploits ILP via compiler transformations. +// +// There are multiple methods for analyzing a loop and creating a schedule. +// An example algorithm is Swing Modulo Scheduling (implemented by the +// MachinePipeliner). The details of how a schedule is arrived at are irrelevant +// for the task of actually rewriting a loop to adhere to the schedule, which +// is what this file does. +// +// A schedule is, for every instruction in a block, a Cycle and a Stage. Note +// that we only support single-block loops, so "block" and "loop" can be used +// interchangably. +// +// The Cycle of an instruction defines a partial order of the instructions in +// the remapped loop. Instructions within a cycle must not consume the output +// of any instruction in the same cycle. Cycle information is assumed to have +// been calculated such that the processor will execute instructions in +// lock-step (for example in a VLIW ISA). +// +// The Stage of an instruction defines the mapping between logical loop +// iterations and pipelined loop iterations. An example (unrolled) pipeline +// may look something like: +// +// I0[0] Execute instruction I0 of iteration 0 +// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1 +// I1[1], I0[2] +// I1[2], I0[3] +// +// In the schedule for this unrolled sequence we would say that I0 was scheduled +// in stage 0 and I1 in stage 1: +// +// loop: +// [stage 0] x = I0 +// [stage 1] I1 x (from stage 0) +// +// And to actually generate valid code we must insert a phi: +// +// loop: +// x' = phi(x) +// x = I0 +// I1 x' +// +// This is a simple example; the rules for how to generate correct code given +// an arbitrary schedule containing loop-carried values are complex. +// +// Note that these examples only mention the steady-state kernel of the +// generated loop; prologs and epilogs must be generated also that prime and +// flush the pipeline. Doing so is nontrivial. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H +#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineLoopUtils.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include <deque> +#include <vector> + +namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class LiveIntervals; + +/// Represents a schedule for a single-block loop. For every instruction we +/// maintain a Cycle and Stage. +class ModuloSchedule { +private: + /// The block containing the loop instructions. + MachineLoop *Loop; + + /// The instructions to be generated, in total order. Cycle provides a partial + /// order; the total order within cycles has been decided by the schedule + /// producer. + std::vector<MachineInstr *> ScheduledInstrs; + + /// The cycle for each instruction. + DenseMap<MachineInstr *, int> Cycle; + + /// The stage for each instruction. + DenseMap<MachineInstr *, int> Stage; + + /// The number of stages in this schedule (Max(Stage) + 1). + int NumStages; + +public: + /// Create a new ModuloSchedule. + /// \arg ScheduledInstrs The new loop instructions, in total resequenced + /// order. + /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does + /// not need to start at zero. ScheduledInstrs must be partially ordered by + /// Cycle. + /// \arg Stage Stage index for all instructions in ScheduleInstrs. + ModuloSchedule(MachineFunction &MF, MachineLoop *Loop, + std::vector<MachineInstr *> ScheduledInstrs, + DenseMap<MachineInstr *, int> Cycle, + DenseMap<MachineInstr *, int> Stage) + : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)), + Stage(std::move(Stage)) { + NumStages = 0; + for (auto &KV : this->Stage) + NumStages = std::max(NumStages, KV.second); + ++NumStages; + } + + /// Return the single-block loop being scheduled. + MachineLoop *getLoop() const { return Loop; } + + /// Return the number of stages contained in this schedule, which is the + /// largest stage index + 1. + int getNumStages() const { return NumStages; } + + /// Return the first cycle in the schedule, which is the cycle index of the + /// first instruction. + int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; } + + /// Return the final cycle in the schedule, which is the cycle index of the + /// last instruction. + int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; } + + /// Return the stage that MI is scheduled in, or -1. + int getStage(MachineInstr *MI) { + auto I = Stage.find(MI); + return I == Stage.end() ? -1 : I->second; + } + + /// Return the cycle that MI is scheduled at, or -1. + int getCycle(MachineInstr *MI) { + auto I = Cycle.find(MI); + return I == Cycle.end() ? -1 : I->second; + } + + /// Return the rescheduled instructions in order. + ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; } + + void dump() { print(dbgs()); } + void print(raw_ostream &OS); +}; + +/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, +/// rewriting the old loop and inserting prologs and epilogs as required. +class ModuloScheduleExpander { +public: + using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>; + +private: + using ValueMapTy = DenseMap<unsigned, unsigned>; + using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; + using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; + + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals &LIS; + + MachineBasicBlock *BB; + MachineBasicBlock *Preheader; + MachineBasicBlock *NewKernel = nullptr; + std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo; + + /// Map for each register and the max difference between its uses and def. + /// The first element in the pair is the max difference in stages. The + /// second is true if the register defines a Phi value and loop value is + /// scheduled before the Phi. + std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff; + + /// Instructions to change when emitting the final schedule. + InstrChangesTy InstrChanges; + + void generatePipelinedLoop(); + void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, MBBVectorTy &PrologBBs); + void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs); + void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, + bool IsLast); + void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, + MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, + ValueMapTy *VRMap, InstrMapTy &InstrMap, + unsigned LastStageNum, unsigned CurStageNum, bool IsLast); + void removeDeadInstructions(MachineBasicBlock *KernelBB, + MBBVectorTy &EpilogBBs); + void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); + void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, + MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, + ValueMapTy *VRMap); + bool computeDelta(MachineInstr &MI, unsigned &Delta); + void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, + unsigned Num); + MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, + unsigned InstStageNum); + void updateInstruction(MachineInstr *NewMI, bool LastDef, + unsigned CurStageNum, unsigned InstrStageNum, + ValueMapTy *VRMap); + MachineInstr *findDefInLoop(unsigned Reg); + unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, + unsigned LoopStage, ValueMapTy *VRMap, + MachineBasicBlock *BB); + void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, + ValueMapTy *VRMap, InstrMapTy &InstrMap); + void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap, + unsigned CurStageNum, unsigned PhiNum, + MachineInstr *Phi, unsigned OldReg, + unsigned NewReg, unsigned PrevReg = 0); + bool isLoopCarried(MachineInstr &Phi); + + /// Return the max. number of stages/iterations that can occur between a + /// register definition and its uses. + unsigned getStagesForReg(int Reg, unsigned CurStage) { + std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; + if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 && + Stages.second) + return 1; + return Stages.first; + } + + /// The number of stages for a Phi is a little different than other + /// instructions. The minimum value computed in RegToStageDiff is 1 + /// because we assume the Phi is needed for at least 1 iteration. + /// This is not the case if the loop value is scheduled prior to the + /// Phi in the same stage. This function returns the number of stages + /// or iterations needed between the Phi definition and any uses. + unsigned getStagesForPhi(int Reg) { + std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; + if (Stages.second) + return Stages.first; + return Stages.first - 1; + } + +public: + /// Create a new ModuloScheduleExpander. + /// \arg InstrChanges Modifications to make to instructions with memory + /// operands. + /// FIXME: InstrChanges is opaque and is an implementation detail of an + /// optimization in MachinePipeliner that crosses abstraction boundaries. + ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals &LIS, InstrChangesTy InstrChanges) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS), + InstrChanges(std::move(InstrChanges)) {} + + /// Performs the actual expansion. + void expand(); + /// Performs final cleanup after expansion. + void cleanup(); + + /// Returns the newly rewritten kernel block, or nullptr if this was + /// optimized away. + MachineBasicBlock *getRewrittenKernel() { return NewKernel; } +}; + +/// A reimplementation of ModuloScheduleExpander. It works by generating a +/// standalone kernel loop and peeling out the prologs and epilogs. +class PeelingModuloScheduleExpander { + ModuloSchedule &Schedule; + MachineFunction &MF; + const TargetSubtargetInfo &ST; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + LiveIntervals *LIS; + + /// The original loop block that gets rewritten in-place. + MachineBasicBlock *BB; + /// The original loop preheader. + MachineBasicBlock *Preheader; + /// All prolog and epilog blocks. + SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs; + /// For every block, the stages that are produced. + DenseMap<MachineBasicBlock *, BitVector> LiveStages; + /// For every block, the stages that are available. A stage can be available + /// but not produced (in the epilog) or produced but not available (in the + /// prolog). + DenseMap<MachineBasicBlock *, BitVector> AvailableStages; + + /// CanonicalMIs and BlockMIs form a bidirectional map between any of the + /// loop kernel clones. + DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs; + DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *> + BlockMIs; + + /// State passed from peelKernel to peelPrologAndEpilogs(). + std::deque<MachineBasicBlock *> PeeledFront, PeeledBack; + +public: + PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, + LiveIntervals *LIS) + : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()), + TII(ST.getInstrInfo()), LIS(LIS) {} + + void expand(); + + /// Runs ModuloScheduleExpander and treats it as a golden input to validate + /// aspects of the code generated by PeelingModuloScheduleExpander. + void validateAgainstModuloScheduleExpander(); + +protected: + /// Converts BB from the original loop body to the rewritten, pipelined + /// steady-state. + void rewriteKernel(); + +private: + /// Peels one iteration of the rewritten kernel (BB) in the specified + /// direction. + MachineBasicBlock *peelKernel(LoopPeelDirection LPD); + /// Peel the kernel forwards and backwards to produce prologs and epilogs, + /// and stitch them together. + void peelPrologAndEpilogs(); + /// All prolog and epilog blocks are clones of the kernel, so any produced + /// register in one block has an corollary in all other blocks. + Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB); + /// Change all users of MI, if MI is predicated out + /// (LiveStages[MI->getParent()] == false). + void rewriteUsesOf(MachineInstr *MI); + /// Insert branches between prologs, kernel and epilogs. + void fixupBranches(); + /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block + /// to a block dominated by all prologs and epilogs. This allows us to treat + /// the loop exiting block as any other kernel clone. + MachineBasicBlock *CreateLCSSAExitingBlock(); + /// Helper to get the stage of an instruction in the schedule. + unsigned getStage(MachineInstr *MI) { + if (CanonicalMIs.count(MI)) + MI = CanonicalMIs[MI]; + return Schedule.getStage(MI); + } +}; + +/// Expander that simply annotates each scheduled instruction with a post-instr +/// symbol that can be consumed by the ModuloScheduleTest pass. +/// +/// The post-instr symbol is a way of annotating an instruction that can be +/// roundtripped in MIR. The syntax is: +/// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5> +class ModuloScheduleTestAnnotater { + MachineFunction &MF; + ModuloSchedule &S; + +public: + ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S) + : MF(MF), S(S) {} + + /// Performs the annotation. + void annotate(); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index 8b014ccbb07b..099ba788e9a2 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -28,17 +28,17 @@ class Vector { public: /// Construct a PBQP vector of the given size. explicit Vector(unsigned Length) - : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {} + : Length(Length), Data(std::make_unique<PBQPNum []>(Length)) {} /// Construct a PBQP vector with initializer. Vector(unsigned Length, PBQPNum InitVal) - : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) { + : Length(Length), Data(std::make_unique<PBQPNum []>(Length)) { std::fill(Data.get(), Data.get() + Length, InitVal); } /// Copy construct a PBQP vector. Vector(const Vector &V) - : Length(V.Length), Data(llvm::make_unique<PBQPNum []>(Length)) { + : Length(V.Length), Data(std::make_unique<PBQPNum []>(Length)) { std::copy(V.Data.get(), V.Data.get() + Length, Data.get()); } @@ -125,21 +125,21 @@ private: public: /// Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : - Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) { + Rows(Rows), Cols(Cols), Data(std::make_unique<PBQPNum []>(Rows * Cols)) { } /// Construct a PBQP Matrix with the given dimensions and initial /// value. Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal) : Rows(Rows), Cols(Cols), - Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) { + Data(std::make_unique<PBQPNum []>(Rows * Cols)) { std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal); } /// Copy construct a PBQP matrix. Matrix(const Matrix &M) : Rows(M.Rows), Cols(M.Cols), - Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) { + Data(std::make_unique<PBQPNum []>(Rows * Cols)) { std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get()); } diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index d92ee93268e7..1e765ce51e4a 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -226,6 +226,10 @@ namespace llvm { /// inserting cmov instructions. extern char &EarlyIfConverterID; + /// EarlyIfPredicator - This pass performs if-conversion on SSA form by + /// predicating if/else block and insert select at the join point. + extern char &EarlyIfPredicatorID; + /// This pass performs instruction combining using trace metrics to estimate /// critical-path and resource depth. extern char &MachineCombinerID; diff --git a/include/llvm/CodeGen/Register.h b/include/llvm/CodeGen/Register.h index 907c1a99e56f..aa5173684e24 100644 --- a/include/llvm/CodeGen/Register.h +++ b/include/llvm/CodeGen/Register.h @@ -9,6 +9,7 @@ #ifndef LLVM_CODEGEN_REGISTER_H #define LLVM_CODEGEN_REGISTER_H +#include "llvm/MC/MCRegister.h" #include <cassert> namespace llvm { @@ -20,41 +21,136 @@ class Register { public: Register(unsigned Val = 0): Reg(Val) {} + Register(MCRegister Val): Reg(Val) {} + + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // + // 0 Not a register, can be used as a sentinel. + // [1;2^30) Physical registers assigned by TableGen. + // [2^30;2^31) Stack slots. (Rarely used.) + // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo. + // + // Further sentinels can be allocated from the small negative integers. + // DenseMapInfo<unsigned> uses -1u and -2u. + + /// isStackSlot - Sometimes it is useful the be able to store a non-negative + /// frame index in a variable that normally holds a register. isStackSlot() + /// returns true if Reg is in the range used for stack slots. + /// + /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack + /// slots, so if a variable may contains a stack slot, always check + /// isStackSlot() first. + /// + static bool isStackSlot(unsigned Reg) { + return MCRegister::isStackSlot(Reg); + } + + /// Compute the frame index from a register value representing a stack slot. + static int stackSlot2Index(unsigned Reg) { + assert(isStackSlot(Reg) && "Not a stack slot"); + return int(Reg - (1u << 30)); + } + + /// Convert a non-negative frame index to a stack slot register value. + static unsigned index2StackSlot(int FI) { + assert(FI >= 0 && "Cannot hold a negative frame index."); + return FI + (1u << 30); + } + + /// Return true if the specified register number is in + /// the physical register namespace. + static bool isPhysicalRegister(unsigned Reg) { + return MCRegister::isPhysicalRegister(Reg); + } + + /// Return true if the specified register number is in + /// the virtual register namespace. + static bool isVirtualRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); + return int(Reg) < 0; + } + + /// Convert a virtual register number to a 0-based index. + /// The first virtual register in a function will get the index 0. + static unsigned virtReg2Index(unsigned Reg) { + assert(isVirtualRegister(Reg) && "Not a virtual register"); + return Reg & ~(1u << 31); + } + + /// Convert a 0-based index to a virtual register number. + /// This is the inverse operation of VirtReg2IndexFunctor below. + static unsigned index2VirtReg(unsigned Index) { + return Index | (1u << 31); + } /// Return true if the specified register number is in the virtual register /// namespace. bool isVirtual() const { - return int(Reg) < 0; + return isVirtualRegister(Reg); } /// Return true if the specified register number is in the physical register /// namespace. bool isPhysical() const { - return int(Reg) > 0; + return isPhysicalRegister(Reg); } /// Convert a virtual register number to a 0-based index. The first virtual /// register in a function will get the index 0. unsigned virtRegIndex() const { - assert(isVirtual() && "Not a virtual register"); - return Reg & ~(1u << 31); - } - - /// Convert a 0-based index to a virtual register number. - /// This is the inverse operation of VirtReg2IndexFunctor below. - static Register index2VirtReg(unsigned Index) { - return Register(Index | (1u << 31)); + return virtReg2Index(Reg); } operator unsigned() const { return Reg; } + unsigned id() const { return Reg; } + + operator MCRegister() const { + return MCRegister(Reg); + } + bool isValid() const { return Reg != 0; } + + /// Comparisons between register objects + bool operator==(const Register &Other) const { return Reg == Other.Reg; } + bool operator!=(const Register &Other) const { return Reg != Other.Reg; } + bool operator==(const MCRegister &Other) const { return Reg == Other.id(); } + bool operator!=(const MCRegister &Other) const { return Reg != Other.id(); } + + /// Comparisons against register constants. E.g. + /// * R == AArch64::WZR + /// * R == 0 + /// * R == VirtRegMap::NO_PHYS_REG + bool operator==(unsigned Other) const { return Reg == Other; } + bool operator!=(unsigned Other) const { return Reg != Other; } + bool operator==(int Other) const { return Reg == unsigned(Other); } + bool operator!=(int Other) const { return Reg != unsigned(Other); } + // MSVC requires that we explicitly declare these two as well. + bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); } + bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); } +}; + +// Provide DenseMapInfo for Register +template<> struct DenseMapInfo<Register> { + static inline unsigned getEmptyKey() { + return DenseMapInfo<unsigned>::getEmptyKey(); + } + static inline unsigned getTombstoneKey() { + return DenseMapInfo<unsigned>::getTombstoneKey(); + } + static unsigned getHashValue(const Register &Val) { + return DenseMapInfo<unsigned>::getHashValue(Val.id()); + } + static bool isEqual(const Register &LHS, const Register &RHS) { + return DenseMapInfo<unsigned>::isEqual(LHS.id(), RHS.id()); + } }; } -#endif +#endif // ifndef LLVM_CODEGEN_REGISTER_H diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h index 14af5c4d090d..25b310c47621 100644 --- a/include/llvm/CodeGen/RegisterClassInfo.h +++ b/include/llvm/CodeGen/RegisterClassInfo.h @@ -110,7 +110,7 @@ public: /// getLastCalleeSavedAlias - Returns the last callee saved register that /// overlaps PhysReg, or 0 if Reg doesn't overlap a CalleeSavedAliases. unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + assert(Register::isPhysicalRegister(PhysReg)); if (PhysReg < CalleeSavedAliases.size()) return CalleeSavedAliases[PhysReg]; return 0; diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 5bbaa03fd751..92333b859f1b 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -129,6 +129,8 @@ public: bool operator==(const PressureChange &RHS) const { return PSetID == RHS.PSetID && UnitInc == RHS.UnitInc; } + + void dump() const; }; /// List of PressureChanges in order of increasing, unique PSetID. @@ -248,6 +250,7 @@ struct RegPressureDelta { bool operator!=(const RegPressureDelta &RHS) const { return !operator==(RHS); } + void dump() const; }; /// A set of live virtual registers and physical register units. @@ -273,15 +276,15 @@ private: unsigned NumRegUnits; unsigned getSparseIndexFromReg(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits; + if (Register::isVirtualRegister(Reg)) + return Register::virtReg2Index(Reg) + NumRegUnits; assert(Reg < NumRegUnits); return Reg; } unsigned getRegFromSparseIndex(unsigned SparseIndex) const { if (SparseIndex >= NumRegUnits) - return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits); + return Register::index2VirtReg(SparseIndex-NumRegUnits); return SparseIndex; } diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 9c48df82f07d..5b5a80a67e7f 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -51,7 +51,7 @@ class RegScavenger { /// If non-zero, the specific register is currently being /// scavenged. That is, it is spilled to this scavenging stack slot. - unsigned Reg = 0; + Register Reg; /// The instruction that restores the scavenged register from stack. const MachineInstr *Restore = nullptr; @@ -119,14 +119,14 @@ public: MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; } /// Return if a specific register is currently used. - bool isRegUsed(unsigned Reg, bool includeReserved = true) const; + bool isRegUsed(Register Reg, bool includeReserved = true) const; /// Return all available registers in the register class in Mask. BitVector getRegsAvailable(const TargetRegisterClass *RC); /// Find an unused register of the specified register class. /// Return 0 if none is found. - unsigned FindUnusedReg(const TargetRegisterClass *RC) const; + Register FindUnusedReg(const TargetRegisterClass *RC) const; /// Add a scavenging frame index. void addScavengingFrameIndex(int FI) { @@ -160,10 +160,10 @@ public: /// /// If \p AllowSpill is false, fail if a spill is required to make the /// register available, and return NoRegister. - unsigned scavengeRegister(const TargetRegisterClass *RC, + Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill = true); - unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj, + Register scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj, bool AllowSpill = true) { return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill); } @@ -177,17 +177,17 @@ public: /// /// If \p AllowSpill is false, fail if a spill is required to make the /// register available, and return NoRegister. - unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, + Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill = true); /// Tell the scavenger a register is used. - void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); + void setRegUsed(Register Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); private: /// Returns true if a register is reserved. It is never "unused". - bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); } + bool isReserved(Register Reg) const { return MRI->isReserved(Reg); } /// setUsed / setUnused - Mark the state of one or a number of register units. /// @@ -203,16 +203,16 @@ private: void determineKillsAndDefs(); /// Add all Reg Units that Reg contains to BV. - void addRegUnits(BitVector &BV, unsigned Reg); + void addRegUnits(BitVector &BV, Register Reg); /// Remove all Reg Units that \p Reg contains from \p BV. - void removeRegUnits(BitVector &BV, unsigned Reg); + void removeRegUnits(BitVector &BV, Register Reg); /// Return the candidate register that is unused for the longest after /// StartMI. UseMI is set to the instruction where the search stopped. /// /// No more than InstrLimit instructions are inspected. - unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI, + Register findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI); @@ -225,7 +225,7 @@ private: /// Spill a register after position \p After and reload it before position /// \p UseMI. - ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator Before, MachineBasicBlock::iterator &UseMI); }; diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 3e3b604acbac..1eb9b9f322ba 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -34,6 +34,7 @@ namespace llvm { + class AAResults; class LiveIntervals; class MachineFrameInfo; class MachineFunction; @@ -57,7 +58,7 @@ namespace llvm { : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); + return Register::virtReg2Index(VirtReg); } }; @@ -173,7 +174,7 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep = nullptr; + AAResults *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special @@ -201,7 +202,7 @@ namespace llvm { Value2SUsMap &loads, unsigned N); /// Adds a chain edge between SUa and SUb, but only if both - /// AliasAnalysis and Target fail to deny the dependency. + /// AAResults and Target fail to deny the dependency. void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency = 0); @@ -306,7 +307,7 @@ namespace llvm { /// If \p RPTracker is non-null, compute register pressure as a side effect. /// The DAG builder is an efficient place to do it because it already visits /// operands. - void buildSchedGraph(AliasAnalysis *AA, + void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker = nullptr, PressureDiffs *PDiffs = nullptr, LiveIntervals *LIS = nullptr, @@ -374,6 +375,9 @@ namespace llvm { /// Returns a mask for which lanes get read/written by the given (register) /// machine operand. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + /// Returns true if the def register in \p MO has no uses. + bool deadDefHasNoUse(const MachineOperand &MO); }; /// Creates a new SUnit and return a ptr to it. diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 12a970847021..6b8e2dd803ba 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -26,8 +26,6 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -58,6 +56,7 @@ namespace llvm { +class AAResults; class BlockAddress; class Constant; class ConstantFP; @@ -66,6 +65,7 @@ class DataLayout; struct fltSemantics; class GlobalValue; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineConstantPoolValue; @@ -269,7 +269,13 @@ class SelectionDAG { using CallSiteInfo = MachineFunction::CallSiteInfo; using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl; - DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo; + + struct CallSiteDbgInfo { + CallSiteInfo CSInfo; + MDNode *HeapAllocSite = nullptr; + }; + + DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo; uint16_t NextPersistentId = 0; @@ -382,7 +388,11 @@ private: Node->OperandList = nullptr; } void CreateTopologicalOrder(std::vector<SDNode*>& Order); + public: + // Maximum depth for recursive analysis such as computeKnownBits, etc. + static constexpr unsigned MaxRecursionDepth = 6; + explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); SelectionDAG(const SelectionDAG &) = delete; SelectionDAG &operator=(const SelectionDAG &) = delete; @@ -489,7 +499,7 @@ public: /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and /// types that are illegal on the target. - void Combine(CombineLevel Level, AliasAnalysis *AA, + void Combine(CombineLevel Level, AAResults *AA, CodeGenOpt::Level OptLevel); /// This transforms the SelectionDAG into a SelectionDAG that @@ -628,10 +638,9 @@ public: SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset = 0, bool isTargetGA = false, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, - int64_t offset = 0, - unsigned char TargetFlags = 0) { + int64_t offset = 0, unsigned TargetFlags = 0) { return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags); } SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false); @@ -639,28 +648,27 @@ public: return getFrameIndex(FI, VT, true); } SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false, - unsigned char TargetFlags = 0); - SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) { + unsigned TargetFlags = 0); + SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) { return getJumpTable(JTI, VT, true, TargetFlags); } - SDValue getConstantPool(const Constant *C, EVT VT, - unsigned Align = 0, int Offs = 0, bool isT=false, - unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(const Constant *C, EVT VT, - unsigned Align = 0, int Offset = 0, - unsigned char TargetFlags = 0) { + SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align = 0, + int Offs = 0, bool isT = false, + unsigned TargetFlags = 0); + SDValue getTargetConstantPool(const Constant *C, EVT VT, unsigned Align = 0, + int Offset = 0, unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Align = 0, int Offs = 0, bool isT=false, - unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(MachineConstantPoolValue *C, - EVT VT, unsigned Align = 0, - int Offset = 0, unsigned char TargetFlags=0) { + unsigned TargetFlags = 0); + SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Align = 0, int Offset = 0, + unsigned TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); // When generating a branch to a BB, we don't in general know enough // to provide debug info for the BB at that time, so keep this one around. SDValue getBasicBlock(MachineBasicBlock *MBB); @@ -668,7 +676,7 @@ public: SDValue getExternalSymbol(const char *Sym, EVT VT); SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT); SDValue getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags = 0); + unsigned TargetFlags = 0); SDValue getMCSymbol(MCSymbol *Sym, EVT VT); SDValue getValueType(EVT); @@ -677,12 +685,10 @@ public: SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label); SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root, MCSymbol *Label); - SDValue getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset = 0, bool isTarget = false, - unsigned char TargetFlags = 0); + SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0, + bool isTarget = false, unsigned TargetFlags = 0); SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset = 0, - unsigned char TargetFlags = 0) { + int64_t Offset = 0, unsigned TargetFlags = 0) { return getBlockAddress(BA, VT, Offset, true, TargetFlags); } @@ -1035,7 +1041,7 @@ public: unsigned Align = 0, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, - unsigned Size = 0, + uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, @@ -1117,9 +1123,11 @@ public: MachineMemOperand *MMO, bool IsTruncating = false, bool IsCompressing = false); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, - ArrayRef<SDValue> Ops, MachineMemOperand *MMO); + ArrayRef<SDValue> Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, - ArrayRef<SDValue> Ops, MachineMemOperand *MMO); + ArrayRef<SDValue> Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); /// Return (create a new or find existing) a target-specific node. /// TargetMemSDNode should be derived class from MemSDNode. @@ -1588,9 +1596,12 @@ public: /// Extract. The reduction must use one of the opcodes listed in /p /// CandidateBinOps and on success /p BinOp will contain the matching opcode. /// Returns the vector that is being reduced on, or SDValue() if a reduction - /// was not matched. + /// was not matched. If \p AllowPartials is set then in the case of a + /// reduction pattern that only matches the first few stages, the extracted + /// subvector of the start of the reduction is returned. SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef<ISD::NodeType> CandidateBinOps); + ArrayRef<ISD::NodeType> CandidateBinOps, + bool AllowPartials = false); /// Utility function used by legalize and lowering to /// "unroll" a vector operation by splitting out the scalars and operating @@ -1664,16 +1675,28 @@ public: } void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) { - SDCallSiteInfo[CallNode] = std::move(CallInfo); + SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo); } CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) { - auto I = SDCallSiteInfo.find(CallNode); - if (I != SDCallSiteInfo.end()) - return std::move(I->second); + auto I = SDCallSiteDbgInfo.find(CallNode); + if (I != SDCallSiteDbgInfo.end()) + return std::move(I->second).CSInfo; return CallSiteInfo(); } + void addHeapAllocSite(const SDNode *Node, MDNode *MD) { + SDCallSiteDbgInfo[Node].HeapAllocSite = MD; + } + + /// Return the HeapAllocSite type associated with the SDNode, if it exists. + MDNode *getHeapAllocSite(const SDNode *Node) { + auto It = SDCallSiteDbgInfo.find(Node); + if (It == SDCallSiteDbgInfo.end()) + return nullptr; + return It->second.HeapAllocSite; + } + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -1712,7 +1735,7 @@ private: std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes; StringMap<SDNode*> ExternalSymbols; - std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols; + std::map<std::pair<std::string, unsigned>, SDNode *> TargetExternalSymbols; DenseMap<MCSymbol *, SDNode *> MCSymbols; }; diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 147c325342fc..de71a21d4671 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -22,22 +22,23 @@ #include <memory> namespace llvm { - class FastISel; - class SelectionDAGBuilder; - class SDValue; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class OptimizationRemarkEmitter; - class TargetLowering; - class TargetLibraryInfo; - class FunctionLoweringInfo; - class ScheduleHazardRecognizer; - class SwiftErrorValueTracking; - class GCFunctionInfo; - class ScheduleDAGSDNodes; - class LoadInst; +class AAResults; +class FastISel; +class SelectionDAGBuilder; +class SDValue; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class OptimizationRemarkEmitter; +class TargetLowering; +class TargetLibraryInfo; +class FunctionLoweringInfo; +class ScheduleHazardRecognizer; +class SwiftErrorValueTracking; +class GCFunctionInfo; +class ScheduleDAGSDNodes; +class LoadInst; /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. @@ -51,7 +52,7 @@ public: MachineRegisterInfo *RegInfo; SelectionDAG *CurDAG; SelectionDAGBuilder *SDB; - AliasAnalysis *AA; + AAResults *AA; GCFunctionInfo *GFI; CodeGenOpt::Level OptLevel; const TargetInstrInfo *TII; @@ -162,6 +163,7 @@ public: OPC_EmitMergeInputChains1_1, OPC_EmitMergeInputChains1_2, OPC_EmitCopyToReg, + OPC_EmitCopyToReg2, OPC_EmitNodeXForm, OPC_EmitNode, // Space-optimized forms that implicitly encode number of result VTs. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 5aab9643e09d..ceb8b72635a2 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -548,10 +548,15 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class MaskedGatherScatterSDNode; uint16_t : NumMemSDNodeBits; - uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode + // This storage is shared between disparate class hierarchies to hold an + // enumeration specific to the class hierarchy in use. + // LSBaseSDNode => enum ISD::MemIndexedMode + // MaskedGatherScatterSDNode => enum ISD::MemIndexType + uint16_t AddressingMode : 3; }; enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; @@ -696,14 +701,20 @@ public: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: case ISD::STRICT_FCEIL: case ISD::STRICT_FFLOOR: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: return true; @@ -1346,6 +1357,17 @@ public: /// store occurs. AtomicOrdering getOrdering() const { return MMO->getOrdering(); } + /// Return true if the memory operation ordering is Unordered or higher. + bool isAtomic() const { return MMO->isAtomic(); } + + /// Returns true if the memory operation doesn't imply any ordering + /// constraints on surrounding memory operations beyond the normal memory + /// aliasing rules. + bool isUnordered() const { return MMO->isUnordered(); } + + /// Returns true if the memory operation is neither atomic or volatile. + bool isSimple() const { return !isAtomic() && !isVolatile(); } + /// Return the type of the in-memory value. EVT getMemoryVT() const { return MemoryVT; } @@ -1702,16 +1724,16 @@ class GlobalAddressSDNode : public SDNode { const GlobalValue *TheGlobal; int64_t Offset; - unsigned char TargetFlags; + unsigned TargetFlags; GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, int64_t o, - unsigned char TF); + unsigned TF); public: const GlobalValue *getGlobal() const { return TheGlobal; } int64_t getOffset() const { return Offset; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } // Return the address space this GlobalAddress belongs to. unsigned getAddressSpace() const; @@ -1778,16 +1800,16 @@ class JumpTableSDNode : public SDNode { friend class SelectionDAG; int JTI; - unsigned char TargetFlags; + unsigned TargetFlags; - JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF) + JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { } public: int getIndex() const { return JTI; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::JumpTable || @@ -1804,10 +1826,10 @@ class ConstantPoolSDNode : public SDNode { } Val; int Offset; // It's a MachineConstantPoolValue if top bit is set. unsigned Alignment; // Minimum alignment requirement of CP (not log2 value). - unsigned char TargetFlags; + unsigned TargetFlags; ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, - unsigned Align, unsigned char TF) + unsigned Align, unsigned TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1816,7 +1838,7 @@ class ConstantPoolSDNode : public SDNode { } ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, - EVT VT, int o, unsigned Align, unsigned char TF) + EVT VT, int o, unsigned Align, unsigned TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, DebugLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1847,7 +1869,7 @@ public: // Return the alignment of this constant pool object, which is either 0 (for // default alignment) or the desired value. unsigned getAlignment() const { return Alignment; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } Type *getType() const; @@ -1861,16 +1883,16 @@ public: class TargetIndexSDNode : public SDNode { friend class SelectionDAG; - unsigned char TargetFlags; + unsigned TargetFlags; int Index; int64_t Offset; public: - TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF) - : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), - TargetFlags(TF), Index(Idx), Offset(Ofs) {} + TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) + : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), + TargetFlags(TF), Index(Idx), Offset(Ofs) {} - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } int getIndex() const { return Index; } int64_t getOffset() const { return Offset; } @@ -2063,17 +2085,17 @@ class BlockAddressSDNode : public SDNode { const BlockAddress *BA; int64_t Offset; - unsigned char TargetFlags; + unsigned TargetFlags; BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, - int64_t o, unsigned char Flags) + int64_t o, unsigned Flags) : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), BA(ba), Offset(o), TargetFlags(Flags) {} public: const BlockAddress *getBlockAddress() const { return BA; } int64_t getOffset() const { return Offset; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::BlockAddress || @@ -2104,15 +2126,16 @@ class ExternalSymbolSDNode : public SDNode { friend class SelectionDAG; const char *Symbol; - unsigned char TargetFlags; + unsigned TargetFlags; - ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT) - : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, - 0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {} + ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) + : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, + DebugLoc(), getSDVTList(VT)), + Symbol(Sym), TargetFlags(TF) {} public: const char *getSymbol() const { return Symbol; } - unsigned char getTargetFlags() const { return TargetFlags; } + unsigned getTargetFlags() const { return TargetFlags; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::ExternalSymbol || @@ -2181,8 +2204,6 @@ public: : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); - assert((!MMO->isAtomic() || MMO->isVolatile()) && - "use an AtomicSDNode instead for non-volatile atomics"); } const SDValue &getOffset() const { @@ -2362,8 +2383,24 @@ public: MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, - MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + MachineMemOperand *MMO, ISD::MemIndexType IndexType) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = IndexType; + assert(getIndexType() == IndexType && "Value truncated"); + } + + /// How is Index applied to BasePtr when computing addresses. + ISD::MemIndexType getIndexType() const { + return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); + } + bool isIndexScaled() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::UNSIGNED_SCALED); + } + bool isIndexSigned() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::SIGNED_UNSCALED); + } // In the both nodes address is Op1, mask is Op2: // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) @@ -2387,8 +2424,10 @@ public: friend class SelectionDAG; MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {} + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) + : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, + IndexType) {} const SDValue &getPassThru() const { return getOperand(1); } @@ -2404,8 +2443,10 @@ public: friend class SelectionDAG; MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - EVT MemVT, MachineMemOperand *MMO) - : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {} + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) + : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, + IndexType) {} const SDValue &getValue() const { return getOperand(1); } diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h index 2bdf4425e24a..ed52db3e6269 100644 --- a/include/llvm/CodeGen/StackProtector.h +++ b/include/llvm/CodeGen/StackProtector.h @@ -61,6 +61,12 @@ private: /// protection when -fstack-protection is used. unsigned SSPBufferSize = 0; + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet<const PHINode *, 16> VisitedPHIs; + // A prologue is generated. bool HasPrologue = false; diff --git a/include/llvm/CodeGen/SwitchLoweringUtils.h b/include/llvm/CodeGen/SwitchLoweringUtils.h index 62134dc792f7..b8adcf759b19 100644 --- a/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -212,16 +212,17 @@ struct BitTestBlock { BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; + bool OmitRangeCheck; BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr) {} + Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {} }; -/// Return the range of value within a range. +/// Return the range of values within a range. uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, unsigned Last); diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h index aebeeecbe506..db3d1175afee 100644 --- a/include/llvm/CodeGen/TargetCallingConv.h +++ b/include/llvm/CodeGen/TargetCallingConv.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_TARGETCALLINGCONV_H #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -120,16 +121,22 @@ namespace ISD { bool isPointer() const { return IsPointer; } void setPointer() { IsPointer = 1; } - unsigned getByValAlign() const { return (1U << ByValAlign) / 2; } - void setByValAlign(unsigned A) { - ByValAlign = Log2_32(A) + 1; - assert(getByValAlign() == A && "bitfield overflow"); + unsigned getByValAlign() const { + MaybeAlign A = decodeMaybeAlign(ByValAlign); + return A ? A->value() : 0; + } + void setByValAlign(Align A) { + ByValAlign = encode(A); + assert(getByValAlign() == A.value() && "bitfield overflow"); } - unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; } - void setOrigAlign(unsigned A) { - OrigAlign = Log2_32(A) + 1; - assert(getOrigAlign() == A && "bitfield overflow"); + unsigned getOrigAlign() const { + MaybeAlign A = decodeMaybeAlign(OrigAlign); + return A ? A->value() : 0; + } + void setOrigAlign(Align A) { + OrigAlign = encode(A); + assert(getOrigAlign() == A.value() && "bitfield overflow"); } unsigned getByValSize() const { return ByValSize; } diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h index 878c9ffd2b51..72edb27964c4 100644 --- a/include/llvm/CodeGen/TargetFrameLowering.h +++ b/include/llvm/CodeGen/TargetFrameLowering.h @@ -28,6 +28,7 @@ namespace TargetStackID { enum Value { Default = 0, SGPRSpill = 1, + SVEVector = 2, NoAlloc = 255 }; } @@ -53,15 +54,15 @@ public: }; private: StackDirection StackDir; - unsigned StackAlignment; - unsigned TransientStackAlignment; + Align StackAlignment; + Align TransientStackAlignment; int LocalAreaOffset; bool StackRealignable; public: - TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1, bool StackReal = true) - : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), - LocalAreaOffset(LAO), StackRealignable(StackReal) {} + TargetFrameLowering(StackDirection D, Align StackAl, int LAO, + Align TransAl = Align::None(), bool StackReal = true) + : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), + LocalAreaOffset(LAO), StackRealignable(StackReal) {} virtual ~TargetFrameLowering(); @@ -76,7 +77,7 @@ public: /// stack pointer must be aligned on entry to a function. Typically, this /// is the largest alignment for any data object in the target. /// - unsigned getStackAlignment() const { return StackAlignment; } + unsigned getStackAlignment() const { return StackAlignment.value(); } /// alignSPAdjust - This method aligns the stack adjustment to the correct /// alignment. @@ -95,7 +96,7 @@ public: /// calls. /// unsigned getTransientStackAlignment() const { - return TransientStackAlignment; + return TransientStackAlignment.value(); } /// isStackRealignable - This method returns whether the stack can be @@ -366,15 +367,10 @@ public: /// Check if given function is safe for not having callee saved registers. /// This is used when interprocedural register allocation is enabled. - static bool isSafeForNoCSROpt(const Function &F) { - if (!F.hasLocalLinkage() || F.hasAddressTaken() || - !F.hasFnAttribute(Attribute::NoRecurse)) - return false; - // Function should not be optimized as tail call. - for (const User *U : F.users()) - if (auto CS = ImmutableCallSite(U)) - if (CS.isTailCall()) - return false; + static bool isSafeForNoCSROpt(const Function &F); + + /// Check if the no-CSR optimisation is profitable for the given function. + virtual bool isProfitableForNoCSROpt(const Function &F) const { return true; } diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 25b04f8c019a..5011cf34c0ee 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -38,10 +38,12 @@ namespace llvm { +class AAResults; class DFAPacketizer; class InstrItineraryData; class LiveIntervals; class LiveVariables; +class MachineLoop; class MachineMemOperand; class MachineRegisterInfo; class MCAsmInfo; @@ -60,6 +62,8 @@ class TargetSubtargetInfo; template <class T> class SmallVectorImpl; +using ParamLoadedValue = std::pair<MachineOperand, DIExpression*>; + //--------------------------------------------------------------------------- /// /// TargetInstrInfo - Interface to description of machine instruction set @@ -92,7 +96,7 @@ public: /// registers so that the instructions result is independent of the place /// in the function. bool isTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA = nullptr) const { + AAResults *AA = nullptr) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && (isReallyTriviallyReMaterializable(MI, AA) || @@ -108,7 +112,7 @@ protected: /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { + AAResults *AA) const { return false; } @@ -151,7 +155,7 @@ private: /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AliasAnalysis *AA) const; + AAResults *AA) const; public: /// These methods return the opcode of the frame setup/destroy instructions @@ -419,7 +423,8 @@ public: /// findCommutedOpIndices(MI, Op1, Op2); /// can be interpreted as a query asking to find an operand that would be /// commutable with the operand#1. - virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + virtual bool findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const; /// A pair composed of a register and a sub-register index. @@ -659,6 +664,50 @@ public: BytesAdded); } + /// Object returned by analyzeLoopForPipelining. Allows software pipelining + /// implementations to query attributes of the loop being pipelined and to + /// apply target-specific updates to the loop once pipelining is complete. + class PipelinerLoopInfo { + public: + virtual ~PipelinerLoopInfo(); + /// Return true if the given instruction should not be pipelined and should + /// be ignored. An example could be a loop comparison, or induction variable + /// update with no users being pipelined. + virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0; + + /// Create a condition to determine if the trip count of the loop is greater + /// than TC. + /// + /// If the trip count is statically known to be greater than TC, return + /// true. If the trip count is statically known to be not greater than TC, + /// return false. Otherwise return nullopt and fill out Cond with the test + /// condition. + virtual Optional<bool> + createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB, + SmallVectorImpl<MachineOperand> &Cond) = 0; + + /// Modify the loop such that the trip count is + /// OriginalTC + TripCountAdjust. + virtual void adjustTripCount(int TripCountAdjust) = 0; + + /// Called when the loop's preheader has been modified to NewPreheader. + virtual void setPreheader(MachineBasicBlock *NewPreheader) = 0; + + /// Called when the loop is being removed. Any instructions in the preheader + /// should be removed. + /// + /// Once this function is called, no other functions on this object are + /// valid; the loop has been removed. + virtual void disposed() = 0; + }; + + /// Analyze loop L, which must be a single-basic-block loop, and if the + /// conditions can be understood enough produce a PipelinerLoopInfo object. + virtual std::unique_ptr<PipelinerLoopInfo> + analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { + return nullptr; + } + /// Analyze the loop code, return true if it cannot be understoo. Upon /// success, this function returns false and returns information about the /// induction variable and compare instruction used at the end. @@ -730,6 +779,19 @@ public: return false; } + /// Return the increase in code size needed to predicate a contiguous run of + /// NumInsts instructions. + virtual unsigned extraSizeToPredicateInstructions(const MachineFunction &MF, + unsigned NumInsts) const { + return 0; + } + + /// Return an estimate for the code size reduction (in bytes) which will be + /// caused by removing the given branch instruction during if-conversion. + virtual unsigned predictBranchSizeForIfCvt(MachineInstr &MI) const { + return getInstSizeInBytes(MI); + } + /// Return true if it's profitable to unpredicate /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually /// exclusive predicates. @@ -1558,8 +1620,7 @@ public: /// function. virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, - const MachineInstr &MIb, - AliasAnalysis *AA = nullptr) const { + const MachineInstr &MIb) const { assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); assert((MIb.mayLoad() || MIb.mayStore()) && @@ -1636,6 +1697,28 @@ public: return false; } + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHIDestinationCopy( + MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src); + } + + /// During PHI eleimination lets target to make necessary checks and + /// insert the copy to the PHI destination register in a target specific + /// manner. + virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsPt, + const DebugLoc &DL, Register Src, + Register SrcSubReg, + Register Dst) const { + return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) + .addReg(Src, 0, SrcSubReg); + } + /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. virtual outliner::OutlinedFunction getOutliningCandidateInfo( @@ -1691,6 +1774,11 @@ public: return false; } + /// Produce the expression describing the \p MI loading a value into + /// the parameter's forwarding register. + virtual Optional<ParamLoadedValue> + describeLoadedValue(const MachineInstr &MI) const; + private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CatchRetOpcode; diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index d5cca60bb1b2..a58fca7e73f5 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -48,6 +47,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -72,8 +72,10 @@ class Constant; class FastISel; class FunctionLoweringInfo; class GlobalValue; +class GISelKnownBits; class IntrinsicInst; struct KnownBits; +class LegacyDivergenceAnalysis; class LLVMContext; class MachineBasicBlock; class MachineFunction; @@ -122,8 +124,7 @@ public: TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type, - // if an operation is not supported in target HW. + TypeSoftenFloat, // Convert this float to a same size integer type. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -284,7 +285,7 @@ public: /// a constant pool load whose address depends on the select condition. The /// parameter may be used to differentiate a select with FP compare from /// integer compare. - virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { + virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { return true; } @@ -539,6 +540,12 @@ public: return hasAndNotCompare(X); } + /// Return true if the target has a bit-test instruction: + /// (X & (1 << Y)) ==/!= 0 + /// This knowledge can be used to prevent breaking the pattern, + /// or creating it if it could be recognized. + virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } + /// There are two ways to clear extreme bits (either low or high): /// Mask: x & (-1 << y) (the instcombine canonical form) /// Shifts: x >> y << y @@ -571,6 +578,38 @@ public: return false; } + /// Given the pattern + /// (X & (C l>>/<< Y)) ==/!= 0 + /// return true if it should be transformed into: + /// ((X <</l>> Y) & C) ==/!= 0 + /// WARNING: if 'X' is a constant, the fold may deadlock! + /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() + /// here because it can end up being not linked in. + virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + if (hasBitTest(X, Y)) { + // One interesting pattern that we'd want to form is 'bit test': + // ((1 << Y) & C) ==/!= 0 + // But we also need to be careful not to try to reverse that fold. + + // Is this '1 << Y' ? + if (OldShiftOpcode == ISD::SHL && CC->isOne()) + return false; // Keep the 'bit test' pattern. + + // Will it be '1 << Y' after the transform ? + if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) + return true; // Do form the 'bit test' pattern. + } + + // If 'X' is a constant, and we transform, then we will immediately + // try to undo the fold, thus causing endless combine loop. + // So by default, let's assume everyone prefers the fold + // iff 'X' is not a constant. + return !XC; + } + /// These two forms are equivalent: /// sub %y, (xor %x, -1) /// add (add %x, 1), %y @@ -798,9 +837,9 @@ public: PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; int offset = 0; // offset off of ptrVal - unsigned size = 0; // the size of the memory location + uint64_t size = 0; // the size of the memory location // (taken from memVT if zero) - unsigned align = 1; // alignment + MaybeAlign align = Align::None(); // alignment MachineMemOperand::Flags flags = MachineMemOperand::MONone; IntrinsicInfo() = default; @@ -884,6 +923,7 @@ public: case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Supported = isSupportedFixedPointOperation(Op, VT, Scale); break; } @@ -891,6 +931,8 @@ public: return Supported ? Action : Expand; } + // If Op is a strict floating-point operation, return the result + // of getOperationAction for the equivalent non-strict operation. LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { unsigned EqOpc; switch (Op) { @@ -911,26 +953,25 @@ public: case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } - auto Action = getOperationAction(EqOpc, VT); - - // We don't currently handle Custom or Promote for strict FP pseudo-ops. - // For now, we just expand for those cases. - if (Action != Legal) - Action = Expand; - - return Action; + return getOperationAction(EqOpc, VT); } /// Return true if the specified operation is legal on this target or can be @@ -1206,7 +1247,7 @@ public: EltTy = PointerTy.getTypeForEVT(Ty->getContext()); } return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), - VTy->getNumElements()); + VTy->getElementCount()); } return EVT::getEVT(Ty, AllowUnknown); @@ -1316,9 +1357,9 @@ public: /// Certain targets have context senstive alignment requirements, where one /// type has the alignment requirement of another type. - virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, - DataLayout DL) const { - return DL.getABITypeAlignment(ArgTy); + virtual Align getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return Align(DL.getABITypeAlignment(ArgTy)); } /// If true, then instruction selection should seek to shrink the FP constant @@ -1426,11 +1467,38 @@ public: return false; } + /// LLT handling variant. + virtual bool allowsMisalignedMemoryAccesses( + LLT, unsigned AddrSpace = 0, unsigned Align = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool * /*Fast*/ = nullptr) const { + return false; + } + + /// This function returns true if the memory access is aligned or if the + /// target allows this specific unaligned memory access. If the access is + /// allowed, the optional final parameter returns if the access is also fast + /// (as defined by the target). + bool allowsMemoryAccessForAlignment( + LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace = 0, unsigned Alignment = 1, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const; + + /// Return true if the memory access of this type is aligned or if the target + /// allows this specific unaligned access for the given MachineMemOperand. + /// If the access is allowed, the optional final parameter returns if the + /// access is also fast (as defined by the target). + bool allowsMemoryAccessForAlignment(LLVMContext &Context, + const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, + bool *Fast = nullptr) const; + /// Return true if the target supports a memory access of this type for the /// given address space and alignment. If the access is allowed, the optional /// final parameter returns if the access is also fast (as defined by the /// target). - bool + virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, unsigned Alignment = 1, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, @@ -1463,6 +1531,16 @@ public: return MVT::Other; } + + /// LLT returning variant. + virtual LLT + getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/, + unsigned /*SrcAlign*/, bool /*IsMemset*/, + bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, + const AttributeList & /*FuncAttributes*/) const { + return LLT(); + } + /// Returns true if it's safe to use load / store of the specified type to /// expand memcpy / memset inline. /// @@ -1522,35 +1600,19 @@ public: report_fatal_error("Funclet EH is not implemented for this target"); } - /// Returns the target's jmp_buf size in bytes (if never set, the default is - /// 200) - unsigned getJumpBufSize() const { - return JumpBufSize; - } - - /// Returns the target's jmp_buf alignment in bytes (if never set, the default - /// is 0) - unsigned getJumpBufAlignment() const { - return JumpBufAlignment; - } - /// Return the minimum stack alignment of an argument. - unsigned getMinStackArgumentAlignment() const { + Align getMinStackArgumentAlignment() const { return MinStackArgumentAlignment; } /// Return the minimum function alignment. - unsigned getMinFunctionAlignment() const { - return MinFunctionAlignment; - } + Align getMinFunctionAlignment() const { return MinFunctionAlignment; } /// Return the preferred function alignment. - unsigned getPrefFunctionAlignment() const { - return PrefFunctionAlignment; - } + Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } /// Return the preferred loop alignment. - virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { + virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { return PrefLoopAlignment; } @@ -1772,6 +1834,11 @@ public: return IsSigned; } + /// Returns true if arguments should be extended in lib calls. + virtual bool shouldExtendTypeInLibCall(EVT Type) const { + return true; + } + /// Returns how the given (atomic) load should be expanded by the /// IR-level AtomicExpand pass. virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { @@ -1848,7 +1915,8 @@ public: /// This may be true if the target does not directly support the /// multiplication operation for the specified type or the sequence of simpler /// ops is faster than the multiply. - virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { + virtual bool decomposeMulByConstant(LLVMContext &Context, + EVT VT, SDValue C) const { return false; } @@ -2056,40 +2124,25 @@ protected: TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); } - /// Set the target's required jmp_buf buffer size (in bytes); default is 200 - void setJumpBufSize(unsigned Size) { - JumpBufSize = Size; - } - - /// Set the target's required jmp_buf buffer alignment (in bytes); default is - /// 0 - void setJumpBufAlignment(unsigned Align) { - JumpBufAlignment = Align; - } - - /// Set the target's minimum function alignment (in log2(bytes)) - void setMinFunctionAlignment(unsigned Align) { - MinFunctionAlignment = Align; + /// Set the target's minimum function alignment. + void setMinFunctionAlignment(Align Alignment) { + MinFunctionAlignment = Alignment; } /// Set the target's preferred function alignment. This should be set if - /// there is a performance benefit to higher-than-minimum alignment (in - /// log2(bytes)) - void setPrefFunctionAlignment(unsigned Align) { - PrefFunctionAlignment = Align; + /// there is a performance benefit to higher-than-minimum alignment + void setPrefFunctionAlignment(Align Alignment) { + PrefFunctionAlignment = Alignment; } - /// Set the target's preferred loop alignment. Default alignment is zero, it - /// means the target does not care about loop alignment. The alignment is - /// specified in log2(bytes). The target may also override - /// getPrefLoopAlignment to provide per-loop values. - void setPrefLoopAlignment(unsigned Align) { - PrefLoopAlignment = Align; - } + /// Set the target's preferred loop alignment. Default alignment is one, it + /// means the target does not care about loop alignment. The target may also + /// override getPrefLoopAlignment to provide per-loop values. + void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } - /// Set the minimum stack alignment of an argument (in log2(bytes)). - void setMinStackArgumentAlignment(unsigned Align) { - MinStackArgumentAlignment = Align; + /// Set the minimum stack alignment of an argument. + void setMinStackArgumentAlignment(Align Alignment) { + MinStackArgumentAlignment = Alignment; } /// Set the maximum atomic operation size supported by the @@ -2555,6 +2608,12 @@ public: // same blocks of its users. virtual bool shouldConsiderGEPOffsetSplit() const { return false; } + // Return the shift amount threshold for profitable transforms into shifts. + // Transforms creating shifts above the returned value will be avoided. + virtual unsigned getShiftAmountThreshold(EVT VT) const { + return VT.getScalarSizeInBits(); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -2650,25 +2709,19 @@ private: /// register usage. Sched::Preference SchedPreferenceInfo; - /// The size, in bytes, of the target's jmp_buf buffers - unsigned JumpBufSize; - - /// The alignment, in bytes, of the target's jmp_buf buffers - unsigned JumpBufAlignment; - /// The minimum alignment that any argument on the stack needs to have. - unsigned MinStackArgumentAlignment; + Align MinStackArgumentAlignment; /// The minimum function alignment (used when optimizing for size, and to /// prevent explicitly provided alignment from leading to incorrect code). - unsigned MinFunctionAlignment; + Align MinFunctionAlignment; /// The preferred function alignment (used when alignment unspecified and /// optimizing for speed). - unsigned PrefFunctionAlignment; + Align PrefFunctionAlignment; - /// The preferred loop alignment. - unsigned PrefLoopAlignment; + /// The preferred loop alignment (in log2 bot in bytes). + Align PrefLoopAlignment; /// Size in bits of the maximum atomics size the backend supports. /// Accesses larger than this will be expanded by AtomicExpandPass. @@ -2744,7 +2797,6 @@ private: /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; -protected: ValueTypeActionImpl ValueTypeActions; private: @@ -2790,7 +2842,7 @@ protected: /// expected to be merged. unsigned GatherAllAliasesMaxDepth; - /// Specify maximum number of store instructions per memset call. + /// \brief Specify maximum number of store instructions per memset call. /// /// When lowering \@llvm.memset this field specifies the maximum number of /// store operations that may be substituted for the call to memset. Targets @@ -2801,12 +2853,10 @@ protected: /// with 16-bit alignment would result in four 2-byte stores and one 1-byte /// store. This only applies to setting a constant array of a constant size. unsigned MaxStoresPerMemset; - - /// Maximum number of stores operations that may be substituted for the call - /// to memset, used for functions with OptSize attribute. + /// Likewise for functions with the OptSize attribute. unsigned MaxStoresPerMemsetOptSize; - /// Specify maximum bytes of store instructions per memcpy call. + /// \brief Specify maximum number of store instructions per memcpy call. /// /// When lowering \@llvm.memcpy this field specifies the maximum number of /// store operations that may be substituted for a call to memcpy. Targets @@ -2818,8 +2868,8 @@ protected: /// and one 1-byte store. This only applies to copying a constant array of /// constant size. unsigned MaxStoresPerMemcpy; - - + /// Likewise for functions with the OptSize attribute. + unsigned MaxStoresPerMemcpyOptSize; /// \brief Specify max number of store instructions to glue in inlined memcpy. /// /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number @@ -2827,13 +2877,22 @@ protected: // vectorization later on. unsigned MaxGluedStoresPerMemcpy = 0; - /// Maximum number of store operations that may be substituted for a call to - /// memcpy, used for functions with OptSize attribute. - unsigned MaxStoresPerMemcpyOptSize; + /// \brief Specify maximum number of load instructions per memcmp call. + /// + /// When lowering \@llvm.memcmp this field specifies the maximum number of + /// pairs of load operations that may be substituted for a call to memcmp. + /// Targets must set this value based on the cost threshold for that target. + /// Targets should assume that the memcmp will be done using as many of the + /// largest load operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine + /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load + /// and one 1-byte load. This only applies to copying a constant array of + /// constant size. unsigned MaxLoadsPerMemcmp; + /// Likewise for functions with the OptSize attribute. unsigned MaxLoadsPerMemcmpOptSize; - /// Specify maximum bytes of store instructions per memmove call. + /// \brief Specify maximum number of store instructions per memmove call. /// /// When lowering \@llvm.memmove this field specifies the maximum number of /// store instructions that may be substituted for a call to memmove. Targets @@ -2844,9 +2903,7 @@ protected: /// with 8-bit alignment would result in nine 1-byte stores. This only /// applies to copying a constant array of constant size. unsigned MaxStoresPerMemmove; - - /// Maximum number of store instructions that may be substituted for a call to - /// memmove, used for functions with OptSize attribute. + /// Likewise for functions with the OptSize attribute. unsigned MaxStoresPerMemmoveOptSize; /// Tells the code generator that select is more expensive than a branch if @@ -2885,6 +2942,7 @@ protected: class TargetLowering : public TargetLoweringBase { public: struct DAGCombinerInfo; + struct MakeLibCallOptions; TargetLowering(const TargetLowering &) = delete; TargetLowering &operator=(const TargetLowering &) = delete; @@ -2925,6 +2983,14 @@ public: return false; } + /// Returns true if the specified base+offset is a legal indexed addressing + /// mode for this target. \p MI is the load or store instruction that is being + /// considered for transformation. + virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, + bool IsPre, MachineRegisterInfo &MRI) const { + return false; + } + /// Return the entry encoding for a jump table in the current function. The /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. virtual unsigned getJumpTableEncoding() const; @@ -2955,14 +3021,15 @@ public: void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &DL) const; + const SDLoc &DL, const SDValue OldLHS, + const SDValue OldRHS) const; /// Returns a pair of (return value, chain). /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. - std::pair<SDValue, SDValue> makeLibCall( - SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, - bool isSigned, const SDLoc &dl, bool doesNotReturn = false, - bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const; + std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, + EVT RetVT, ArrayRef<SDValue> Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be @@ -3065,6 +3132,14 @@ public: bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const; + /// More limited version of SimplifyDemandedBits that can be used to "look + /// through" ops that don't contribute to the DemandedBits/DemandedElts - + /// bitwise ops etc. + SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + SelectionDAG &DAG, + unsigned Depth) const; + /// Look at Vector Op. At this point, we know that only the DemandedElts /// elements of the result of Op are ever used downstream. If we can use /// this information to simplify Op, create a new simplified DAG node and @@ -3099,6 +3174,15 @@ public: const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const; + /// Determine which of the bits specified in Mask are known to be either zero + /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts + /// argument allows us to only collect the known bits that are shared by the + /// requested vector elements. This is for GISel. + virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, + Register R, KnownBits &Known, + const APInt &DemandedElts, + const MachineRegisterInfo &MRI, + unsigned Depth = 0) const; /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. /// Default implementation computes low bits based on alignment @@ -3139,6 +3223,21 @@ public: TargetLoweringOpt &TLO, unsigned Depth = 0) const; + /// More limited version of SimplifyDemandedBits that can be used to "look + /// through" ops that don't contribute to the DemandedBits/DemandedElts - + /// bitwise ops etc. + virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const; + + /// Tries to build a legal vector shuffle using the provided parameters + /// or equivalent variations. The Mask argument maybe be modified as the + /// function tries different variations. + /// Returns an empty SDValue if the operation fails. + SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef<int> Mask, + SelectionDAG &DAG) const; + /// This method returns the constant pool value that will be loaded by LD. /// NOTE: You must check for implicit extensions of the constant by LD. virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; @@ -3174,6 +3273,8 @@ public: SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); + bool recursivelyDeleteUnusedNodes(SDNode *N); + void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); }; @@ -3297,6 +3398,18 @@ public: llvm_unreachable("Not Implemented"); } + /// Return 1 if we can compute the negated form of the specified expression + /// for the same cost as the expression itself, or 2 if we can compute the + /// negated form more cheaply than the expression itself. Else return 0. + virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) const; + + /// If isNegatibleForFree returns true, return the newly negated expression. + virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth = 0) const; + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. @@ -3468,6 +3581,51 @@ public: } }; + /// This structure is used to pass arguments to makeLibCall function. + struct MakeLibCallOptions { + // By passing type list before soften to makeLibCall, the target hook + // shouldExtendTypeInLibCall can get the original type before soften. + ArrayRef<EVT> OpsVTBeforeSoften; + EVT RetVTBeforeSoften; + bool IsSExt : 1; + bool DoesNotReturn : 1; + bool IsReturnValueUsed : 1; + bool IsPostTypeLegalization : 1; + bool IsSoften : 1; + + MakeLibCallOptions() + : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), + IsPostTypeLegalization(false), IsSoften(false) {} + + MakeLibCallOptions &setSExt(bool Value = true) { + IsSExt = Value; + return *this; + } + + MakeLibCallOptions &setNoReturn(bool Value = true) { + DoesNotReturn = Value; + return *this; + } + + MakeLibCallOptions &setDiscardResult(bool Value = true) { + IsReturnValueUsed = !Value; + return *this; + } + + MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { + IsPostTypeLegalization = Value; + return *this; + } + + MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, + bool Value = true) { + OpsVTBeforeSoften = OpsVT; + RetVTBeforeSoften = RetVT; + IsSoften = Value; + return *this; + } + }; + /// This function lowers an abstract call to a function into an actual call. /// This returns a pair of operands. The first element is the return value /// for the function (if RetTy is not VoidTy). The second element is the @@ -3537,8 +3695,8 @@ public: /// Return the register ID of the name passed in. Used by named register /// global variables extension. There is no target-independent behaviour /// so the default action is to bail. - virtual unsigned getRegisterByName(const char* RegName, EVT VT, - SelectionDAG &DAG) const { + virtual Register getRegisterByName(const char* RegName, EVT VT, + const MachineFunction &MF) const { report_fatal_error("Named registers not implemented for this target"); } @@ -3597,6 +3755,25 @@ public: return MachineMemOperand::MONone; } + /// Should SelectionDAG lower an atomic store of the given kind as a normal + /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using StoreSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + assert(SI.isAtomic() && "violated precondition"); + return false; + } + + /// Should SelectionDAG lower an atomic load of the given kind as a normal + /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using LoadSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + assert(LI.isAtomic() && "violated precondition"); + return false; + } + + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do @@ -3665,6 +3842,7 @@ public: C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. C_Memory, // Memory constraint. + C_Immediate, // Requires an immediate. C_Other, // Something else. C_Unknown // Unsupported constraint. }; @@ -3905,7 +4083,7 @@ public: /// \param N Node to expand /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; /// Expand UINT(i64) to double(f64) conversion /// \param N Node to expand @@ -3986,8 +4164,8 @@ public: /// method accepts integers as its arguments. SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; - /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts - /// integers as its arguments. + /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This + /// method accepts integers as its arguments. SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion @@ -4070,6 +4248,11 @@ private: DAGCombinerInfo &DCI, const SDLoc &DL) const; + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 + SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, @@ -4077,6 +4260,14 @@ private: SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + + SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const; + SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const; }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index a1fb81cb009d..59f5ddbd9dac 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H +#include "llvm/BinaryFormat/XCOFF.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -35,7 +36,7 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { protected: MCSymbolRefExpr::VariantKind PLTRelativeVariantKind = MCSymbolRefExpr::VK_None; - const TargetMachine *TM; + const TargetMachine *TM = nullptr; public: TargetLoweringObjectFileELF() = default; @@ -126,7 +127,8 @@ public: MachineModuleInfo *MMI) const override; /// Get MachO PC relative GOT entry relocation - const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; @@ -206,6 +208,34 @@ public: const TargetMachine &TM) const override; }; +class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile { +public: + TargetLoweringObjectFileXCOFF() = default; + ~TargetLoweringObjectFileXCOFF() override = default; + + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + + bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, + const Function &F) const override; + + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + MCSection *getStaticCtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + + const MCExpr *lowerRelativeReference(const GlobalValue *LHS, + const GlobalValue *RHS, + const TargetMachine &TM) const override; + + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO); +}; + } // end namespace llvm #endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index 0bd82aafac37..d48fc664c1c3 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -280,7 +280,7 @@ public: /// /// This can also be used to plug a new MachineSchedStrategy into an instance /// of the standard ScheduleDAGMI: - /// return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false) + /// return new ScheduleDAGMI(C, std::make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false) /// /// Return NULL to select the default (generic) machine scheduler. virtual ScheduleDAGInstrs * diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h index ddbd677b3eaa..c42ca3ad6eb9 100644 --- a/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/include/llvm/CodeGen/TargetRegisterInfo.h @@ -87,11 +87,20 @@ public: /// Return true if the specified register is included in this register class. /// This does not include virtual registers. bool contains(unsigned Reg) const { + /// FIXME: Historically this function has returned false when given vregs + /// but it should probably only receive physical registers + if (!Register::isPhysicalRegister(Reg)) + return false; return MC->contains(Reg); } /// Return true if both registers are in this class. bool contains(unsigned Reg1, unsigned Reg2) const { + /// FIXME: Historically this function has returned false when given a vregs + /// but it should probably only receive physical registers + if (!Register::isPhysicalRegister(Reg1) || + !Register::isPhysicalRegister(Reg2)) + return false; return MC->contains(Reg1, Reg2); } @@ -258,57 +267,6 @@ public: // Further sentinels can be allocated from the small negative integers. // DenseMapInfo<unsigned> uses -1u and -2u. - /// isStackSlot - Sometimes it is useful the be able to store a non-negative - /// frame index in a variable that normally holds a register. isStackSlot() - /// returns true if Reg is in the range used for stack slots. - /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. - /// - static bool isStackSlot(unsigned Reg) { - return int(Reg) >= (1 << 30); - } - - /// Compute the frame index from a register value representing a stack slot. - static int stackSlot2Index(unsigned Reg) { - assert(isStackSlot(Reg) && "Not a stack slot"); - return int(Reg - (1u << 30)); - } - - /// Convert a non-negative frame index to a stack slot register value. - static unsigned index2StackSlot(int FI) { - assert(FI >= 0 && "Cannot hold a negative frame index."); - return FI + (1u << 30); - } - - /// Return true if the specified register number is in - /// the physical register namespace. - static bool isPhysicalRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return int(Reg) > 0; - } - - /// Return true if the specified register number is in - /// the virtual register namespace. - static bool isVirtualRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return int(Reg) < 0; - } - - /// Convert a virtual register number to a 0-based index. - /// The first virtual register in a function will get the index 0. - static unsigned virtReg2Index(unsigned Reg) { - assert(isVirtualRegister(Reg) && "Not a virtual register"); - return Reg & ~(1u << 31); - } - - /// Convert a 0-based index to a virtual register number. - /// This is the inverse operation of VirtReg2IndexFunctor below. - static unsigned index2VirtReg(unsigned Index) { - return Index | (1u << 31); - } - /// Return the size in bits of a register from class RC. unsigned getRegSizeInBits(const TargetRegisterClass &RC) const { return getRegClassInfo(RC).RegSize; @@ -419,9 +377,9 @@ public: /// Returns true if the two registers are equal or alias each other. /// The registers may be virtual registers. - bool regsOverlap(unsigned regA, unsigned regB) const { + bool regsOverlap(Register regA, Register regB) const { if (regA == regB) return true; - if (isVirtualRegister(regA) || isVirtualRegister(regB)) + if (regA.isVirtual() || regB.isVirtual()) return false; // Regunits are numerically ordered. Find a common unit. @@ -489,6 +447,14 @@ public: llvm_unreachable("target does not provide no preserved mask"); } + /// Return a list of all of the registers which are clobbered "inside" a call + /// to the given function. For example, these might be needed for PLT + /// sequences of long-branch veneers. + virtual ArrayRef<MCPhysReg> + getIntraCallClobberedRegs(const MachineFunction *MF) const { + return {}; + } + /// Return true if all bits that are set in mask \p mask0 are also set in /// \p mask1. bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; @@ -535,6 +501,11 @@ public: return false; } + /// This is a wrapper around getCallPreservedMask(). + /// Return true if the register is preserved after the call. + virtual bool isCalleeSavedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const; + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). @@ -709,13 +680,9 @@ public: /// Find the largest common subclass of A and B. /// Return NULL if there is no common subclass. - /// The common subclass should contain - /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - const MVT::SimpleValueType SVT = - MVT::SimpleValueType::Any) const; + const TargetRegisterClass *B) const; /// Returns a TargetRegisterClass used for pointer values. /// If a target supports multiple different pointer register classes, @@ -1005,6 +972,13 @@ public: const MachineRegisterInfo &MRI) const { return nullptr; } + + /// Returns the physical register number of sub-register "Index" + /// for physical register RegNo. Return zero if the sub-register does not + /// exist. + inline Register getSubReg(MCRegister Reg, unsigned Idx) const { + return static_cast<const MCRegisterInfo *>(this)->getSubReg(Reg, Idx); + } }; //===----------------------------------------------------------------------===// @@ -1156,7 +1130,7 @@ public: struct VirtReg2IndexFunctor { using argument_type = unsigned; unsigned operator()(unsigned Reg) const { - return TargetRegisterInfo::virtReg2Index(Reg); + return Register::virtReg2Index(Reg); } }; @@ -1170,7 +1144,7 @@ struct VirtReg2IndexFunctor { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n'; -Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, +Printable printReg(Register Reg, const TargetRegisterInfo *TRI = nullptr, unsigned SubIdx = 0, const MachineRegisterInfo *MRI = nullptr); diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h index 037fc3ed3243..56018eca8c27 100644 --- a/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -106,12 +106,10 @@ public: // us do things like a dedicated avx512 selector). However, we might want // to also specialize selectors by MachineFunction, which would let us be // aware of optsize/optnone and such. - virtual const InstructionSelector *getInstructionSelector() const { + virtual InstructionSelector *getInstructionSelector() const { return nullptr; } - virtual unsigned getHwMode() const { return 0; } - /// Target can subclass this hook to select a different DAG scheduler. virtual RegisterScheduler::FunctionPassCtor getDAGScheduler(CodeGenOpt::Level) const { @@ -274,6 +272,12 @@ public: /// scheduling, DAGCombine, etc.). virtual bool useAA() const; + /// \brief Sink addresses into blocks using GEP instructions rather than + /// pointer casts and arithmetic. + virtual bool addrSinkUsingGEPs() const { + return useAA(); + } + /// Enable the use of the early if conversion pass. virtual bool enableEarlyIfConversion() const { return false; } diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index c540c94f79d9..cd4c4ca64081 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -81,7 +81,7 @@ namespace llvm { /// Returns the EVT that represents a vector EC.Min elements in length, /// where each element is of type VT. - static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) { + static EVT getVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) { MVT M = MVT::getVectorVT(VT.V, EC); if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; @@ -277,7 +277,7 @@ namespace llvm { } // Given a (possibly scalable) vector type, return the ElementCount - MVT::ElementCount getVectorElementCount() const { + ElementCount getVectorElementCount() const { assert((isVector()) && "Invalid vector type!"); if (isSimple()) return V.getVectorElementCount(); diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 5818ac183fcc..16df565bc8b8 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -40,127 +40,132 @@ def v16i1 : ValueType<16, 18>; // 16 x i1 vector value def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value def v128i1 : ValueType<128, 21>; // 128 x i1 vector value -def v512i1 : ValueType<512, 22>; // 512 x i1 vector value -def v1024i1: ValueType<1024,23>; //1024 x i1 vector value - -def v1i8 : ValueType<8, 24>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value -def v16i8 : ValueType<128, 28>; // 16 x i8 vector value -def v32i8 : ValueType<256, 29>; // 32 x i8 vector value -def v64i8 : ValueType<512, 30>; // 64 x i8 vector value -def v128i8 : ValueType<1024,31>; //128 x i8 vector value -def v256i8 : ValueType<2048,32>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value -def v8i16 : ValueType<128, 36>; // 8 x i16 vector value -def v16i16 : ValueType<256, 37>; // 16 x i16 vector value -def v32i16 : ValueType<512, 38>; // 32 x i16 vector value -def v64i16 : ValueType<1024,39>; // 64 x i16 vector value -def v128i16: ValueType<2048,40>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value -def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value -def v4i32 : ValueType<128, 44>; // 4 x i32 vector value -def v5i32 : ValueType<160, 45>; // 5 x i32 vector value -def v8i32 : ValueType<256, 46>; // 8 x i32 vector value -def v16i32 : ValueType<512, 47>; // 16 x i32 vector value -def v32i32 : ValueType<1024,48>; // 32 x i32 vector value -def v64i32 : ValueType<2048,49>; // 64 x i32 vector value -def v128i32 : ValueType<4096,50>; // 128 x i32 vector value -def v256i32 : ValueType<8182,51>; // 256 x i32 vector value -def v512i32 : ValueType<16384,52>; // 512 x i32 vector value -def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value - -def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value -def v2i64 : ValueType<128, 56>; // 2 x i64 vector value -def v4i64 : ValueType<256, 57>; // 4 x i64 vector value -def v8i64 : ValueType<512, 58>; // 8 x i64 vector value -def v16i64 : ValueType<1024,59>; // 16 x i64 vector value -def v32i64 : ValueType<2048,60>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 61>; // 1 x i128 vector value - -def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value - -def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value -def v8f16 : ValueType<128, 94>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value -def v4f32 : ValueType<128, 98>; // 4 x f32 vector value -def v5f32 : ValueType<160, 99>; // 5 x f32 vector value -def v8f32 : ValueType<256, 100>; // 8 x f32 vector value -def v16f32 : ValueType<512, 101>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value -def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 109>; // 1 x f64 vector value -def v2f64 : ValueType<128, 110>; // 2 x f64 vector value -def v4f64 : ValueType<256, 111>; // 4 x f64 vector value -def v8f64 : ValueType<512, 112>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 125>; // X86 MMX value -def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue -def isVoid : ValueType<0 , 127>; // Produces no value -def untyped: ValueType<8 , 128>; // Produces an untyped value -def exnref: ValueType<0, 129>; // WebAssembly's exnref type +def v256i1 : ValueType<256, 22>; // 256 x i1 vector value +def v512i1 : ValueType<512, 23>; // 512 x i1 vector value +def v1024i1: ValueType<1024,24>; //1024 x i1 vector value + +def v1i8 : ValueType<8, 25>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 26>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 27>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 28>; // 8 x i8 vector value +def v16i8 : ValueType<128, 29>; // 16 x i8 vector value +def v32i8 : ValueType<256, 30>; // 32 x i8 vector value +def v64i8 : ValueType<512, 31>; // 64 x i8 vector value +def v128i8 : ValueType<1024,32>; //128 x i8 vector value +def v256i8 : ValueType<2048,33>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 34>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 35>; // 2 x i16 vector value +def v3i16 : ValueType<48 , 36>; // 3 x i16 vector value +def v4i16 : ValueType<64 , 37>; // 4 x i16 vector value +def v8i16 : ValueType<128, 38>; // 8 x i16 vector value +def v16i16 : ValueType<256, 39>; // 16 x i16 vector value +def v32i16 : ValueType<512, 40>; // 32 x i16 vector value +def v64i16 : ValueType<1024,41>; // 64 x i16 vector value +def v128i16: ValueType<2048,42>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 43>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 44>; // 2 x i32 vector value +def v3i32 : ValueType<96 , 45>; // 3 x i32 vector value +def v4i32 : ValueType<128, 46>; // 4 x i32 vector value +def v5i32 : ValueType<160, 47>; // 5 x i32 vector value +def v8i32 : ValueType<256, 48>; // 8 x i32 vector value +def v16i32 : ValueType<512, 49>; // 16 x i32 vector value +def v32i32 : ValueType<1024,50>; // 32 x i32 vector value +def v64i32 : ValueType<2048,51>; // 64 x i32 vector value +def v128i32 : ValueType<4096,52>; // 128 x i32 vector value +def v256i32 : ValueType<8182,53>; // 256 x i32 vector value +def v512i32 : ValueType<16384,54>; // 512 x i32 vector value +def v1024i32 : ValueType<32768,55>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536,56>; // 2048 x i32 vector value + +def v1i64 : ValueType<64 , 57>; // 1 x i64 vector value +def v2i64 : ValueType<128, 58>; // 2 x i64 vector value +def v4i64 : ValueType<256, 59>; // 4 x i64 vector value +def v8i64 : ValueType<512, 60>; // 8 x i64 vector value +def v16i64 : ValueType<1024,61>; // 16 x i64 vector value +def v32i64 : ValueType<2048,62>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 63>; // 1 x i128 vector value + +def v2f16 : ValueType<32 , 64>; // 2 x f16 vector value +def v3f16 : ValueType<48 , 65>; // 3 x f16 vector value +def v4f16 : ValueType<64 , 66>; // 4 x f16 vector value +def v8f16 : ValueType<128, 67>; // 8 x f16 vector value +def v16f16 : ValueType<256, 68>; // 8 x f16 vector value +def v32f16 : ValueType<512, 69>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 70>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 71>; // 2 x f32 vector value +def v3f32 : ValueType<96 , 72>; // 3 x f32 vector value +def v4f32 : ValueType<128, 73>; // 4 x f32 vector value +def v5f32 : ValueType<160, 74>; // 5 x f32 vector value +def v8f32 : ValueType<256, 75>; // 8 x f32 vector value +def v16f32 : ValueType<512, 76>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 77>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 78>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 79>; // 128 x f32 vector value +def v256f32 : ValueType<8182, 80>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 81>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 82>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 83>; // 2048 x f32 vector value +def v1f64 : ValueType<64, 84>; // 1 x f64 vector value +def v2f64 : ValueType<128, 85>; // 2 x f64 vector value +def v4f64 : ValueType<256, 86>; // 4 x f64 vector value +def v8f64 : ValueType<512, 87>; // 8 x f64 vector value + +def nxv1i1 : ValueType<1, 88>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 89>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 90>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 91>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 92>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 93>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 94>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 95>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 96>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 97>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 98>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 99>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 100>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 101>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 102>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 103>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 104>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 105>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 106>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 107>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 108>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 109>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 110>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,111>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 112>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 113>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 114>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 115>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,116>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,117>; // n x 32 x i64 vector value + +def nxv2f16 : ValueType<32 , 118>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 119>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 120>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 121>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 122>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 123>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 124>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 125>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 126>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 127>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 128>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 129>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 130>; // X86 MMX value +def FlagVT : ValueType<0 , 131>; // Pre-RA sched glue +def isVoid : ValueType<0 , 132>; // Produces no value +def untyped: ValueType<8 , 133>; // Produces an untyped value +def exnref: ValueType<0, 134>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index 70eb048f05eb..db25ed5c5116 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -49,7 +49,7 @@ class TargetInstrInfo; /// it; even spilled virtual registers (the register mapped to a /// spilled register is the temporary used to load it from the /// stack). - IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap; + IndexedMap<Register, VirtReg2IndexFunctor> Virt2PhysMap; /// Virt2StackSlotMap - This is virtual register to stack slot /// mapping. Each spilled virtual register has an entry in it @@ -93,7 +93,7 @@ class TargetInstrInfo; /// returns true if the specified virtual register is /// mapped to a physical register - bool hasPhys(unsigned virtReg) const { + bool hasPhys(Register virtReg) const { return getPhys(virtReg) != NO_PHYS_REG; } @@ -101,20 +101,20 @@ class TargetInstrInfo; /// virtual register Register getPhys(Register virtReg) const { assert(virtReg.isVirtual()); - return Virt2PhysMap[virtReg]; + return Virt2PhysMap[virtReg.id()]; } /// creates a mapping for the specified virtual register to /// the specified physical register - void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); + void assignVirt2Phys(Register virtReg, MCPhysReg physReg); /// clears the specified virtual register's, physical /// register mapping - void clearVirt(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && + void clearVirt(Register virtReg) { + assert(virtReg.isVirtual()); + assert(Virt2PhysMap[virtReg.id()] != NO_PHYS_REG && "attempt to clear a not assigned virtual register"); - Virt2PhysMap[virtReg] = NO_PHYS_REG; + Virt2PhysMap[virtReg.id()] = NO_PHYS_REG; } /// clears all virtual to physical register mappings @@ -124,21 +124,21 @@ class TargetInstrInfo; } /// returns true if VirtReg is assigned to its preferred physreg. - bool hasPreferredPhys(unsigned VirtReg); + bool hasPreferredPhys(Register VirtReg); /// returns true if VirtReg has a known preferred register. /// This returns false if VirtReg has a preference that is a virtual /// register that hasn't been assigned yet. - bool hasKnownPreference(unsigned VirtReg); + bool hasKnownPreference(Register VirtReg); /// records virtReg is a split live interval from SReg. - void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { - Virt2SplitMap[virtReg] = SReg; + void setIsSplitFromReg(Register virtReg, unsigned SReg) { + Virt2SplitMap[virtReg.id()] = SReg; } /// returns the live interval virtReg is split from. - unsigned getPreSplitReg(unsigned virtReg) const { - return Virt2SplitMap[virtReg]; + unsigned getPreSplitReg(Register virtReg) const { + return Virt2SplitMap[virtReg.id()]; } /// getOriginal - Return the original virtual register that VirtReg descends @@ -152,28 +152,29 @@ class TargetInstrInfo; /// returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. - bool isAssignedReg(unsigned virtReg) const { + bool isAssignedReg(Register virtReg) const { if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. - return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); + return (Virt2SplitMap[virtReg.id()] && + Virt2PhysMap[virtReg.id()] != NO_PHYS_REG); } /// returns the stack slot mapped to the specified virtual /// register - int getStackSlot(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2StackSlotMap[virtReg]; + int getStackSlot(Register virtReg) const { + assert(virtReg.isVirtual()); + return Virt2StackSlotMap[virtReg.id()]; } /// create a mapping for the specifed virtual register to /// the next available stack slot - int assignVirt2StackSlot(unsigned virtReg); + int assignVirt2StackSlot(Register virtReg); /// create a mapping for the specified virtual register to /// the specified stack slot - void assignVirt2StackSlot(unsigned virtReg, int SS); + void assignVirt2StackSlot(Register virtReg, int SS); void print(raw_ostream &OS, const Module* M = nullptr) const override; void dump() const; |