diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /include/llvm/CodeGen | |
parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) |
Notes
Diffstat (limited to 'include/llvm/CodeGen')
89 files changed, 11674 insertions, 1799 deletions
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 60bbc9aaa5bd4..b8944a668000d 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -43,11 +43,11 @@ class DIE; class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; +class GCStrategy; class GlobalIndirectSymbol; class GlobalObject; class GlobalValue; class GlobalVariable; -class GCStrategy; class MachineBasicBlock; class MachineConstantPoolValue; class MachineFunction; @@ -58,6 +58,7 @@ class MachineModuleInfo; class MachineOptimizationRemarkEmitter; class MCAsmInfo; class MCCFIInstruction; +struct MCCodePaddingContext; class MCContext; class MCExpr; class MCInst; @@ -76,11 +77,9 @@ class TargetMachine; class AsmPrinter : public MachineFunctionPass { public: /// Target machine description. - /// TargetMachine &TM; /// Target Asm Printer information. - /// const MCAsmInfo *MAI; /// This is the context for the output file that we are streaming. This owns @@ -103,7 +102,6 @@ public: /// The symbol for the current function. This is recalculated at the beginning /// of each call to runOnMachineFunction(). - /// MCSymbol *CurrentFnSym = nullptr; /// The symbol used to represent the start of the current function for the @@ -116,7 +114,7 @@ public: using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>; MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs; - /// Enable print [latency:throughput] in output + /// Enable print [latency:throughput] in output. bool EnablePrintSchedInfo = false; private: @@ -128,8 +126,8 @@ private: void *GCMetadataPrinters = nullptr; // Really a DenseMap. /// Emit comments in assembly output if this is true. - /// bool VerboseAsm; + static char ID; /// If VerboseAsm is set, a pointer to the loop info for this function. @@ -149,6 +147,7 @@ private: TimerDescription(TimerDescription), TimerGroupName(TimerGroupName), TimerGroupDescription(TimerGroupDescription) {} }; + /// A vector of all debug/EH info emitters we should use. This vector /// maintains ownership of the emitters. SmallVector<HandlerInfo, 1> Handlers; @@ -187,11 +186,9 @@ public: bool isPositionIndependent() const; /// Return true if assembly output should contain comments. - /// bool isVerbose() const { return VerboseAsm; } /// Return a unique ID for the current function. - /// unsigned getFunctionNumber() const; MCSymbol *getFunctionBegin() const { return CurrentFnBegin; } @@ -235,13 +232,15 @@ public: // The table will contain these structs that point to the sled, the function // containing the sled, and what kind of sled (and whether they should always - // be instrumented). + // be instrumented). We also use a version identifier that the runtime can use + // to decide what to do with the sled, depending on the version of the sled. struct XRayFunctionEntry { const MCSymbol *Sled; const MCSymbol *Function; SledKind Kind; bool AlwaysInstrument; const class Function *Fn; + uint8_t Version; void emit(int, MCStreamer *, const MCSymbol *) const; }; @@ -249,8 +248,12 @@ public: // All the sleds to be emitted. SmallVector<XRayFunctionEntry, 4> Sleds; + // A unique ID used for ELF sections associated with a particular function. + unsigned XRayFnUniqueID = 0; + // Helper function to record a given XRay sled. - void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); + void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind, + uint8_t Version = 0); /// Emit a table with all XRay instrumentation points. void emitXRayTable(); @@ -260,7 +263,6 @@ public: //===------------------------------------------------------------------===// /// Record analysis usage. - /// void getAnalysisUsage(AnalysisUsage &AU) const override; /// Set up the AsmPrinter when we are working on a new module. If your pass @@ -293,8 +295,10 @@ public: void emitFrameAlloc(const MachineInstr &MI); + void emitStackSizeSection(const MachineFunction &MF); + enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug }; - CFIMoveType needsCFIMoves(); + CFIMoveType needsCFIMoves() const; /// Returns false if needsCFIMoves() == CFI_M_EH for any function /// in the module. @@ -305,12 +309,10 @@ public: /// Print to the current output stream assembly representations of the /// constants in the constant pool MCP. This is used to print out constants /// which have been "spilled to memory" by the code generator. - /// virtual void EmitConstantPool(); /// Print assembly representations of the jump tables used by the current /// function to the current output stream. - /// virtual void EmitJumpTableInfo(); /// Emit the specified global variable to the .s file. @@ -325,7 +327,6 @@ public: /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for /// correctness. - /// void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. @@ -379,7 +380,7 @@ public: virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; /// Targets can override this to emit stuff at the end of a basic block. - virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} + virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB); /// Targets should implement this to emit instructions. virtual void EmitInstruction(const MachineInstr *) { @@ -443,15 +444,12 @@ public: void printOffset(int64_t Offset, raw_ostream &OS) const; /// Emit a byte directive and value. - /// void EmitInt8(int Value) const; /// Emit a short directive and value. - /// void EmitInt16(int Value) const; /// Emit a long directive and value. - /// void EmitInt32(int Value) const; /// Emit something like ".long Hi-Lo" where the size in bytes of the directive @@ -481,8 +479,12 @@ public: void EmitSLEB128(int64_t Value, const char *Desc = nullptr) const; /// Emit the specified unsigned leb128 value. - void EmitULEB128(uint64_t Value, const char *Desc = nullptr, - unsigned PadTo = 0) const; + void EmitULEB128(uint64_t Value, const char *Desc = nullptr) const; + + /// Emit the specified unsigned leb128 value padded to a specific number + /// bytes + void EmitPaddedULEB128(uint64_t Value, unsigned PadTo, + const char *Desc = nullptr) const; /// Emit a .byte 42 directive that corresponds to an encoding. If verbose /// assembly output is enabled, we output comments describing the encoding. @@ -622,10 +624,13 @@ private: void EmitModuleIdents(Module &M); void EmitXXStructorList(const DataLayout &DL, const Constant *List, bool isCtor); + GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C); /// Emit GlobalAlias or GlobalIFunc. void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol& GIS); + void setupCodePaddingContext(const MachineBasicBlock &MBB, + MCCodePaddingContext &Context) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 6331070247928..bb5e7f9e8e30f 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -6,25 +6,63 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file provides a helper that implements much of the TTI interface in /// terms of the target-independent code generator and TargetLowering /// interfaces. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_BASICTTIIMPL_H #define LLVM_CODEGEN_BASICTTIIMPL_H +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> +#include <utility> namespace llvm { +class Function; +class GlobalValue; +class LLVMContext; +class ScalarEvolution; +class SCEV; +class TargetMachine; + extern cl::opt<unsigned> PartialUnrollingThreshold; /// \brief Base class which can be used to help build a TTI implementation. @@ -39,8 +77,8 @@ extern cl::opt<unsigned> PartialUnrollingThreshold; template <typename T> class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { private: - typedef TargetTransformInfoImplCRTPBase<T> BaseT; - typedef TargetTransformInfo TTI; + using BaseT = TargetTransformInfoImplCRTPBase<T>; + using TTI = TargetTransformInfo; /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. @@ -110,13 +148,13 @@ public: bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) { + unsigned AddrSpace, Instruction *I = nullptr) { TargetLoweringBase::AddrMode AM; AM.BaseGV = BaseGV; AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace); + return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { @@ -133,10 +171,6 @@ public: return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); } - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { - return getTLI()->isFoldableMemAccessOffset(I, Offset); - } - bool isTruncateFree(Type *Ty1, Type *Ty2) { return getTLI()->isTruncateFree(Ty1, Ty2); } @@ -235,7 +269,8 @@ public: if (N < 2 || N < TLI->getMinimumJumpTableEntries()) return N; uint64_t Range = - (MaxCaseVal - MinCaseVal).getLimitedValue(UINT64_MAX - 1) + 1; + (MaxCaseVal - MinCaseVal) + .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; // Check whether a range of clusters is dense enough for a jump table if (TLI->isSuitableForJumpTable(&SI, N, Range)) { JumpTableSize = Range; @@ -262,6 +297,10 @@ public: TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); } + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { + return true; + } + unsigned getFPOpCost(Type *Ty) { // By default, FP instructions are no more expensive since they are // implemented in HW. Target specific TTI can override this. @@ -272,17 +311,15 @@ public: const TargetLoweringBase *TLI = getTLI(); switch (Opcode) { default: break; - case Instruction::Trunc: { + case Instruction::Trunc: if (TLI->isTruncateFree(OpTy, Ty)) return TargetTransformInfo::TCC_Free; return TargetTransformInfo::TCC_Basic; - } - case Instruction::ZExt: { + case Instruction::ZExt: if (TLI->isZExtFree(OpTy, Ty)) return TargetTransformInfo::TCC_Free; return TargetTransformInfo::TCC_Basic; } - } return BaseT::getOperationCost(Opcode, Ty, OpTy); } @@ -354,6 +391,13 @@ public: UP.BEInsns = 2; } + int getInstructionLatency(const Instruction *I) { + if (isa<LoadInst>(I)) + return getST()->getSchedModel().DefaultLoadLatency; + + return BaseT::getInstructionLatency(I); + } + /// @} /// \name Vector TTI Implementations @@ -394,8 +438,8 @@ public: if (A->getType()->isVectorTy()) { VecTy = A->getType(); // If A is a vector operand, VF should be 1 or correspond to A. - assert ((VF == 1 || VF == VecTy->getVectorNumElements()) && - "Vector argument does not match VF"); + assert((VF == 1 || VF == VecTy->getVectorNumElements()) && + "Vector argument does not match VF"); } else VecTy = VectorType::get(A->getType(), VF); @@ -408,8 +452,8 @@ public: } unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) { - assert (VecTy->isVectorTy()); - + assert(VecTy->isVectorTy()); + unsigned Cost = 0; Cost += getScalarizationOverhead(VecTy, true, false); @@ -531,7 +575,6 @@ public: // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { - // Scalar bitcasts are usually free. if (Opcode == Instruction::BitCast) return 0; @@ -547,7 +590,6 @@ public: // Check vector-to-vector casts. if (Dst->isVectorTy() && Src->isVectorTy()) { - // If the cast is between same-sized registers, then the check is simple. if (SrcLT.first == DstLT.first && SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { @@ -743,7 +785,6 @@ public: // We only scale the cost of loads since interleaved store groups aren't // allowed to have gaps. if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { - // The number of loads of a legal type it will take to represent a load // of the unlegalized vector type. unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); @@ -821,7 +862,7 @@ public: ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF = 1) { unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); - assert ((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); + assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); switch (IID) { default: { @@ -829,7 +870,7 @@ public: SmallVector<Type *, 4> Types; for (Value *Op : Args) { Type *OpTy = Op->getType(); - assert (VF == 1 || !OpTy->isVectorTy()); + assert(VF == 1 || !OpTy->isVectorTy()); Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); } @@ -839,7 +880,7 @@ public: // Compute the scalarization overhead based on Args for a vector // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while // CostModel will pass a vector RetTy and VF is 1. - unsigned ScalarizationCost = UINT_MAX; + unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); if (RetVF > 1 || VF > 1) { ScalarizationCost = 0; if (!RetTy->isVoidTy()) @@ -851,7 +892,7 @@ public: getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost); } case Intrinsic::masked_scatter: { - assert (VF == 1 && "Can't vectorize types here."); + assert(VF == 1 && "Can't vectorize types here."); Value *Mask = Args[3]; bool VarMask = !isa<Constant>(Mask); unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue(); @@ -862,7 +903,7 @@ public: Alignment); } case Intrinsic::masked_gather: { - assert (VF == 1 && "Can't vectorize types here."); + assert(VF == 1 && "Can't vectorize types here."); Value *Mask = Args[2]; bool VarMask = !isa<Constant>(Mask); unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue(); @@ -873,13 +914,14 @@ public: } } } - + /// Get intrinsic cost based on argument types. - /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the - /// arguments and the return value will be computed based on types. - unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX) { + /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the + /// cost of scalarizing the arguments and the return value will be computed + /// based on types. + unsigned getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) { SmallVector<unsigned, 2> ISDs; unsigned SingleCallCost = 10; // Library call cost. Make it expensive. switch (IID) { @@ -889,7 +931,7 @@ public: unsigned ScalarCalls = 1; Type *ScalarRetTy = RetTy; if (RetTy->isVectorTy()) { - if (ScalarizationCostPassed == UINT_MAX) + if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) ScalarizationCost = getScalarizationOverhead(RetTy, true, false); ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); ScalarRetTy = RetTy->getScalarType(); @@ -898,7 +940,7 @@ public: for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { Type *Ty = Tys[i]; if (Ty->isVectorTy()) { - if (ScalarizationCostPassed == UINT_MAX) + if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) ScalarizationCost += getScalarizationOverhead(Ty, false, true); ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); Ty = Ty->getScalarType(); @@ -985,6 +1027,7 @@ public: // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::sideeffect: return 0; case Intrinsic::masked_store: return static_cast<T *>(this) @@ -1047,8 +1090,10 @@ public: // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. if (RetTy->isVectorTy()) { - unsigned ScalarizationCost = ((ScalarizationCostPassed != UINT_MAX) ? - ScalarizationCostPassed : getScalarizationOverhead(RetTy, true, false)); + unsigned ScalarizationCost = + ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max()) + ? ScalarizationCostPassed + : getScalarizationOverhead(RetTy, true, false)); unsigned ScalarCalls = RetTy->getVectorNumElements(); SmallVector<Type *, 4> ScalarTys; for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { @@ -1061,7 +1106,7 @@ public: IID, RetTy->getScalarType(), ScalarTys, FMF); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (Tys[i]->isVectorTy()) { - if (ScalarizationCostPassed == UINT_MAX) + if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max()) ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); } @@ -1096,7 +1141,7 @@ public: unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *) { - return 0; + return 0; } /// Try to calculate arithmetic and shuffle op costs for reduction operations. @@ -1134,7 +1179,8 @@ public: /// /// The cost model should take into account that the actual length of the /// vector is reduced on each iteration. - unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { + unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) { assert(Ty->isVectorTy() && "Expect a vector type"); Type *ScalarTy = Ty->getVectorElementType(); unsigned NumVecElts = Ty->getVectorNumElements(); @@ -1159,7 +1205,7 @@ public: } // The minimal length of the vector is limited by the real length of vector // operations performed on the current platform. That's why several final - // reduction opertions are perfomed on the vectors with the same + // reduction operations are performed on the vectors with the same // architecture-dependent length. ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, @@ -1169,6 +1215,66 @@ public: return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); } + /// Try to calculate op costs for min/max reduction operations. + /// \param CondTy Conditional type for the Select instruction. + unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, + bool) { + assert(Ty->isVectorTy() && "Expect a vector type"); + Type *ScalarTy = Ty->getVectorElementType(); + Type *ScalarCondTy = CondTy->getVectorElementType(); + unsigned NumVecElts = Ty->getVectorNumElements(); + unsigned NumReduxLevels = Log2_32(NumVecElts); + unsigned CmpOpcode; + if (Ty->isFPOrFPVectorTy()) { + CmpOpcode = Instruction::FCmp; + } else { + assert(Ty->isIntOrIntVectorTy() && + "expecting floating point or integer type for min/max reduction"); + CmpOpcode = Instruction::ICmp; + } + unsigned MinMaxCost = 0; + unsigned ShuffleCost = 0; + auto *ConcreteTTI = static_cast<T *>(this); + std::pair<unsigned, MVT> LT = + ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty); + unsigned LongVectorCount = 0; + unsigned MVTLen = + LT.second.isVector() ? LT.second.getVectorNumElements() : 1; + while (NumVecElts > MVTLen) { + NumVecElts /= 2; + // Assume the pairwise shuffles add a cost. + ShuffleCost += (IsPairwise + 1) * + ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, + NumVecElts, Ty); + MinMaxCost += + ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, + nullptr); + Ty = VectorType::get(ScalarTy, NumVecElts); + CondTy = VectorType::get(ScalarCondTy, NumVecElts); + ++LongVectorCount; + } + // The minimal length of the vector is limited by the real length of vector + // operations performed on the current platform. That's why several final + // reduction opertions are perfomed on the vectors with the same + // architecture-dependent length. + ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) * + ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, + NumVecElts, Ty); + MinMaxCost += + (NumReduxLevels - LongVectorCount) * + (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, + nullptr)); + // Need 3 extractelement instructions for scalarization + an additional + // scalar select instruction. + return ShuffleCost + MinMaxCost + + 3 * getScalarizationOverhead(Ty, /*Insert=*/false, + /*Extract=*/true) + + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, + ScalarCondTy, nullptr); + } + unsigned getVectorSplitCost() { return 1; } /// @} @@ -1177,7 +1283,8 @@ public: /// \brief Concrete BasicTTIImpl that can be used if no further customization /// is needed. class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { - typedef BasicTTIImplBase<BasicTTIImpl> BaseT; + using BaseT = BasicTTIImplBase<BasicTTIImpl>; + friend class BasicTTIImplBase<BasicTTIImpl>; const TargetSubtargetInfo *ST; @@ -1190,6 +1297,6 @@ public: explicit BasicTTIImpl(const TargetMachine *ST, const Function &F); }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_BASICTTIIMPL_H diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h index 17c9415a81cbd..d9e8206408a78 100644 --- a/include/llvm/CodeGen/CalcSpillWeights.h +++ b/include/llvm/CodeGen/CalcSpillWeights.h @@ -1,4 +1,4 @@ -//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===// +//===- lib/CodeGen/CalcSpillWeights.h ---------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H @@ -16,11 +15,12 @@ namespace llvm { - class LiveInterval; - class LiveIntervals; - class MachineBlockFrequencyInfo; - class MachineLoopInfo; - class VirtRegMap; +class LiveInterval; +class LiveIntervals; +class MachineBlockFrequencyInfo; +class MachineFunction; +class MachineLoopInfo; +class VirtRegMap; /// \brief Normalize the spill weight of a live interval /// @@ -32,7 +32,6 @@ namespace llvm { /// per function call. Derived from block frequencies. /// @param Size Size of live interval as returnexd by getSize() /// @param NumInstr Number of instructions using this live interval - /// static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr) { // The constant 25 instructions is added to avoid depending too much on @@ -47,7 +46,7 @@ namespace llvm { /// spill weight and allocation hint. class VirtRegAuxInfo { public: - typedef float (*NormalizingFn)(float, unsigned, unsigned); + using NormalizingFn = float (*)(float, unsigned, unsigned); private: MachineFunction &MF; @@ -67,6 +66,32 @@ namespace llvm { /// \brief (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &li); + + /// \brief Compute future expected spill weight of a split artifact of li + /// that will span between start and end slot indexes. + /// \param li The live interval to be split. + /// \param start The expected begining of the split artifact. Instructions + /// before start will not affect the weight. + /// \param end The expected end of the split artifact. Instructions + /// after end will not affect the weight. + /// \return The expected spill weight of the split artifact. Returns + /// negative weight for unspillable li. + float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end); + + /// \brief Helper function for weight calculations. + /// (Re)compute li's spill weight and allocation hint, or, for non null + /// start and end - compute future expected spill weight of a split + /// artifact of li that will span between start and end slot indexes. + /// \param li The live interval for which to compute the weight. + /// \param start The expected begining of the split artifact. Instructions + /// before start will not affect the weight. Relevant for + /// weight calculation of future split artifact. + /// \param end The expected end of the split artifact. Instructions + /// after end will not affect the weight. Relevant for + /// weight calculation of future split artifact. + /// \return The spill weight. Returns negative weight for unspillable li. + float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr, + SlotIndex *end = nullptr); }; /// \brief Compute spill weights and allocation hints for all virtual register @@ -77,6 +102,7 @@ namespace llvm { const MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo::NormalizingFn norm = normalizeSpillWeight); -} + +} // end namespace llvm #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 50e464ebb9b80..d30a27328c012 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -1,4 +1,4 @@ -//===-- llvm/CallingConvLower.h - Calling Conventions -----------*- C++ -*-===// +//===- llvm/CallingConvLower.h - Calling Conventions ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,11 +18,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Target/TargetCallingConv.h" namespace llvm { + class CCState; class MVT; class TargetMachine; @@ -200,6 +201,7 @@ private: unsigned MaxStackArgAlign; SmallVector<uint32_t, 16> UsedRegs; SmallVector<CCValAssign, 4> PendingLocs; + SmallVector<ISD::ArgFlagsTy, 4> PendingArgFlags; // ByValInfo and SmallVector<ByValInfo, 4> ByValRegs: // @@ -503,10 +505,15 @@ public: } // Get list of pending assignments - SmallVectorImpl<llvm::CCValAssign> &getPendingLocs() { + SmallVectorImpl<CCValAssign> &getPendingLocs() { return PendingLocs; } + // Get a list of argflags for pending assignments. + SmallVectorImpl<ISD::ArgFlagsTy> &getPendingArgFlags() { + return PendingArgFlags; + } + /// Compute the remaining unused register parameters that would be used for /// the given value type. This is useful when varargs are passed in the /// registers that normal prototyped parameters would be passed in, or for @@ -564,8 +571,6 @@ private: void MarkAllocated(unsigned Reg); }; - - } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_CALLINGCONVLOWER_H diff --git a/include/llvm/CodeGen/CommandFlags.def b/include/llvm/CodeGen/CommandFlags.def new file mode 100644 index 0000000000000..fe96033a9c617 --- /dev/null +++ b/include/llvm/CodeGen/CommandFlags.def @@ -0,0 +1,366 @@ +//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains codegen-specific flags that are shared between different +// command line tools. The tools "llc" and "opt" both use this file to prevent +// flag duplication. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.def" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <string> +using namespace llvm; + +static cl::opt<std::string> + MArch("march", + cl::desc("Architecture to generate code for (see --version)")); + +static cl::opt<std::string> + MCPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), cl::init("")); + +static cl::list<std::string> + MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); + +static cl::opt<Reloc::Model> RelocModel( + "relocation-model", cl::desc("Choose relocation model"), + cl::values( + clEnumValN(Reloc::Static, "static", "Non-relocatable code"), + clEnumValN(Reloc::PIC_, "pic", + "Fully relocatable, position independent code"), + clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", + "Relocatable external references, non-relocatable code"), + clEnumValN(Reloc::ROPI, "ropi", + "Code and read-only data relocatable, accessed PC-relative"), + clEnumValN( + Reloc::RWPI, "rwpi", + "Read-write data relocatable, accessed relative to static base"), + clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi", + "Combination of ropi and rwpi"))); + +LLVM_ATTRIBUTE_UNUSED static Optional<Reloc::Model> getRelocModel() { + if (RelocModel.getNumOccurrences()) { + Reloc::Model R = RelocModel; + return R; + } + return None; +} + +static cl::opt<ThreadModel::Model> TMModel( + "thread-model", cl::desc("Choose threading model"), + cl::init(ThreadModel::POSIX), + cl::values(clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"), + clEnumValN(ThreadModel::Single, "single", + "Single thread model"))); + +static cl::opt<llvm::CodeModel::Model> CMModel( + "code-model", cl::desc("Choose code model"), + cl::values(clEnumValN(CodeModel::Small, "small", "Small code model"), + clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"), + clEnumValN(CodeModel::Medium, "medium", "Medium code model"), + clEnumValN(CodeModel::Large, "large", "Large code model"))); + +LLVM_ATTRIBUTE_UNUSED static Optional<CodeModel::Model> getCodeModel() { + if (CMModel.getNumOccurrences()) { + CodeModel::Model M = CMModel; + return M; + } + return None; +} + +static cl::opt<llvm::ExceptionHandling> ExceptionModel( + "exception-model", cl::desc("exception model"), + cl::init(ExceptionHandling::None), + cl::values( + clEnumValN(ExceptionHandling::None, "default", + "default exception handling model"), + clEnumValN(ExceptionHandling::DwarfCFI, "dwarf", + "DWARF-like CFI based exception handling"), + clEnumValN(ExceptionHandling::SjLj, "sjlj", "SjLj exception handling"), + clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"), + clEnumValN(ExceptionHandling::WinEH, "wineh", + "Windows exception model"))); + +static cl::opt<TargetMachine::CodeGenFileType> FileType( + "filetype", cl::init(TargetMachine::CGFT_AssemblyFile), + cl::desc( + "Choose a file type (not all types are supported by all targets):"), + cl::values(clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(TargetMachine::CGFT_ObjectFile, "obj", + "Emit a native object ('.o') file"), + clEnumValN(TargetMachine::CGFT_Null, "null", + "Emit nothing, for performance testing"))); + +static cl::opt<bool> + DisableFPElim("disable-fp-elim", + cl::desc("Disable frame pointer elimination optimization"), + cl::init(false)); + +static cl::opt<bool> EnableUnsafeFPMath( + "enable-unsafe-fp-math", + cl::desc("Enable optimizations that may decrease FP precision"), + cl::init(false)); + +static cl::opt<bool> EnableNoInfsFPMath( + "enable-no-infs-fp-math", + cl::desc("Enable FP math optimizations that assume no +-Infs"), + cl::init(false)); + +static cl::opt<bool> EnableNoNaNsFPMath( + "enable-no-nans-fp-math", + cl::desc("Enable FP math optimizations that assume no NaNs"), + cl::init(false)); + +static cl::opt<bool> EnableNoSignedZerosFPMath( + "enable-no-signed-zeros-fp-math", + cl::desc("Enable FP math optimizations that assume " + "the sign of 0 is insignificant"), + cl::init(false)); + +static cl::opt<bool> + EnableNoTrappingFPMath("enable-no-trapping-fp-math", + cl::desc("Enable setting the FP exceptions build " + "attribute not to use exceptions"), + cl::init(false)); + +static cl::opt<llvm::FPDenormal::DenormalMode> DenormalMode( + "denormal-fp-math", + cl::desc("Select which denormal numbers the code is permitted to require"), + cl::init(FPDenormal::IEEE), + cl::values(clEnumValN(FPDenormal::IEEE, "ieee", + "IEEE 754 denormal numbers"), + clEnumValN(FPDenormal::PreserveSign, "preserve-sign", + "the sign of a flushed-to-zero number is preserved " + "in the sign of 0"), + clEnumValN(FPDenormal::PositiveZero, "positive-zero", + "denormals are flushed to positive zero"))); + +static cl::opt<bool> EnableHonorSignDependentRoundingFPMath( + "enable-sign-dependent-rounding-fp-math", cl::Hidden, + cl::desc("Force codegen to assume rounding mode can change dynamically"), + cl::init(false)); + +static cl::opt<llvm::FloatABI::ABIType> FloatABIForCalls( + "float-abi", cl::desc("Choose float ABI type"), cl::init(FloatABI::Default), + cl::values(clEnumValN(FloatABI::Default, "default", + "Target default float ABI type"), + clEnumValN(FloatABI::Soft, "soft", + "Soft float ABI (implied by -soft-float)"), + clEnumValN(FloatABI::Hard, "hard", + "Hard float ABI (uses FP registers)"))); + +static cl::opt<llvm::FPOpFusion::FPOpFusionMode> FuseFPOps( + "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"), + cl::init(FPOpFusion::Standard), + cl::values( + clEnumValN(FPOpFusion::Fast, "fast", "Fuse FP ops whenever profitable"), + clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."), + clEnumValN(FPOpFusion::Strict, "off", + "Only fuse FP ops when the result won't be affected."))); + +static cl::opt<bool> DontPlaceZerosInBSS( + "nozero-initialized-in-bss", + cl::desc("Don't place zero-initialized symbols into bss section"), + cl::init(false)); + +static cl::opt<bool> EnableGuaranteedTailCallOpt( + "tailcallopt", + cl::desc( + "Turn fastcc calls into tail calls by (potentially) changing ABI."), + cl::init(false)); + +static cl::opt<bool> DisableTailCalls("disable-tail-calls", + cl::desc("Never emit tail calls"), + cl::init(false)); + +static cl::opt<bool> StackSymbolOrdering("stack-symbol-ordering", + cl::desc("Order local stack symbols."), + cl::init(true)); + +static cl::opt<unsigned> + OverrideStackAlignment("stack-alignment", + cl::desc("Override default stack alignment"), + cl::init(0)); + +static cl::opt<bool> + StackRealign("stackrealign", + cl::desc("Force align the stack to the minimum alignment"), + cl::init(false)); + +static cl::opt<std::string> TrapFuncName( + "trap-func", cl::Hidden, + cl::desc("Emit a call to trap function rather than a trap instruction"), + cl::init("")); + +static cl::opt<bool> UseCtors("use-ctors", + cl::desc("Use .ctors instead of .init_array."), + cl::init(false)); + +static cl::opt<bool> RelaxELFRelocations( + "relax-elf-relocations", + cl::desc("Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), + cl::init(false)); + +static cl::opt<bool> DataSections("data-sections", + cl::desc("Emit data into separate sections"), + cl::init(false)); + +static cl::opt<bool> + FunctionSections("function-sections", + cl::desc("Emit functions into separate sections"), + cl::init(false)); + +static cl::opt<bool> EmulatedTLS("emulated-tls", + cl::desc("Use emulated TLS model"), + cl::init(false)); + +static cl::opt<bool> + UniqueSectionNames("unique-section-names", + cl::desc("Give unique names to every section"), + cl::init(true)); + +static cl::opt<llvm::EABI> + EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"), + cl::init(EABI::Default), + cl::values(clEnumValN(EABI::Default, "default", + "Triple default EABI version"), + clEnumValN(EABI::EABI4, "4", "EABI version 4"), + clEnumValN(EABI::EABI5, "5", "EABI version 5"), + clEnumValN(EABI::GNU, "gnu", "EABI GNU"))); + +static cl::opt<DebuggerKind> DebuggerTuningOpt( + "debugger-tune", cl::desc("Tune debug info for a particular debugger"), + cl::init(DebuggerKind::Default), + cl::values(clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), + clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), + clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); + +static cl::opt<bool> EnableStackSizeSection( + "stack-size-section", + cl::desc("Emit a section containing stack size metadata"), cl::init(false)); + +// Common utility function tightly tied to the options listed here. Initializes +// a TargetOptions object with CodeGen flags and returns it. +static TargetOptions InitTargetOptionsFromCodeGenFlags() { + TargetOptions Options; + Options.AllowFPOpFusion = FuseFPOps; + Options.UnsafeFPMath = EnableUnsafeFPMath; + Options.NoInfsFPMath = EnableNoInfsFPMath; + Options.NoNaNsFPMath = EnableNoNaNsFPMath; + Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath; + Options.NoTrappingFPMath = EnableNoTrappingFPMath; + Options.FPDenormalMode = DenormalMode; + Options.HonorSignDependentRoundingFPMathOption = + EnableHonorSignDependentRoundingFPMath; + if (FloatABIForCalls != FloatABI::Default) + Options.FloatABIType = FloatABIForCalls; + Options.NoZerosInBSS = DontPlaceZerosInBSS; + Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt; + Options.StackAlignmentOverride = OverrideStackAlignment; + Options.StackSymbolOrdering = StackSymbolOrdering; + Options.UseInitArray = !UseCtors; + Options.RelaxELFRelocations = RelaxELFRelocations; + Options.DataSections = DataSections; + Options.FunctionSections = FunctionSections; + Options.UniqueSectionNames = UniqueSectionNames; + Options.EmulatedTLS = EmulatedTLS; + Options.ExceptionModel = ExceptionModel; + Options.EmitStackSizeSection = EnableStackSizeSection; + + Options.MCOptions = InitMCTargetOptionsFromFlags(); + + Options.ThreadModel = TMModel; + Options.EABIVersion = EABIVersion; + Options.DebuggerTuning = DebuggerTuningOpt; + + return Options; +} + +LLVM_ATTRIBUTE_UNUSED static std::string getCPUStr() { + // If user asked for the 'native' CPU, autodetect here. If autodection fails, + // this will set the CPU to an empty string which tells the target to + // pick a basic default. + if (MCPU == "native") + return sys::getHostCPUName(); + + return MCPU; +} + +LLVM_ATTRIBUTE_UNUSED static std::string getFeaturesStr() { + SubtargetFeatures Features; + + // If user asked for the 'native' CPU, we need to autodetect features. + // This is necessary for x86 where the CPU might not support all the + // features the autodetected CPU name lists in the target. For example, + // not all Sandybridge processors support AVX. + if (MCPU == "native") { + StringMap<bool> HostFeatures; + if (sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + } + + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + + return Features.getString(); +} + +/// \brief Set function attributes of functions in Module M based on CPU, +/// Features, and command line flags. +LLVM_ATTRIBUTE_UNUSED static void +setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) { + for (auto &F : M) { + auto &Ctx = F.getContext(); + AttributeList Attrs = F.getAttributes(); + AttrBuilder NewAttrs; + + if (!CPU.empty()) + NewAttrs.addAttribute("target-cpu", CPU); + if (!Features.empty()) + NewAttrs.addAttribute("target-features", Features); + if (DisableFPElim.getNumOccurrences() > 0) + NewAttrs.addAttribute("no-frame-pointer-elim", + DisableFPElim ? "true" : "false"); + if (DisableTailCalls.getNumOccurrences() > 0) + NewAttrs.addAttribute("disable-tail-calls", + toStringRef(DisableTailCalls)); + if (StackRealign) + NewAttrs.addAttribute("stackrealign"); + + if (TrapFuncName.getNumOccurrences() > 0) + for (auto &B : F) + for (auto &I : B) + if (auto *Call = dyn_cast<CallInst>(&I)) + if (const auto *F = Call->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::debugtrap || + F->getIntrinsicID() == Intrinsic::trap) + Call->addAttribute( + llvm::AttributeList::FunctionIndex, + Attribute::get(Ctx, "trap-func-name", TrapFuncName)); + + // Let NewAttrs override Attrs. + F.setAttributes( + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); + } +} diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h deleted file mode 100644 index 0d898827efc61..0000000000000 --- a/include/llvm/CodeGen/CommandFlags.h +++ /dev/null @@ -1,382 +0,0 @@ -//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains codegen-specific flags that are shared between different -// command line tools. The tools "llc" and "opt" both use this file to prevent -// flag duplication. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_COMMANDFLAGS_H -#define LLVM_CODEGEN_COMMANDFLAGS_H - -#include "llvm/ADT/StringExtras.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/MCTargetOptionsCommandFlags.h" -#include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Host.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include <string> -using namespace llvm; - -cl::opt<std::string> -MArch("march", cl::desc("Architecture to generate code for (see --version)")); - -cl::opt<std::string> -MCPU("mcpu", - cl::desc("Target a specific cpu type (-mcpu=help for details)"), - cl::value_desc("cpu-name"), - cl::init("")); - -cl::list<std::string> -MAttrs("mattr", - cl::CommaSeparated, - cl::desc("Target specific attributes (-mattr=help for details)"), - cl::value_desc("a1,+a2,-a3,...")); - -cl::opt<Reloc::Model> RelocModel( - "relocation-model", cl::desc("Choose relocation model"), - cl::values( - clEnumValN(Reloc::Static, "static", "Non-relocatable code"), - clEnumValN(Reloc::PIC_, "pic", - "Fully relocatable, position independent code"), - clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", - "Relocatable external references, non-relocatable code"), - clEnumValN(Reloc::ROPI, "ropi", - "Code and read-only data relocatable, accessed PC-relative"), - clEnumValN(Reloc::RWPI, "rwpi", - "Read-write data relocatable, accessed relative to static base"), - clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi", - "Combination of ropi and rwpi"))); - -static inline Optional<Reloc::Model> getRelocModel() { - if (RelocModel.getNumOccurrences()) { - Reloc::Model R = RelocModel; - return R; - } - return None; -} - -cl::opt<ThreadModel::Model> -TMModel("thread-model", - cl::desc("Choose threading model"), - cl::init(ThreadModel::POSIX), - cl::values(clEnumValN(ThreadModel::POSIX, "posix", - "POSIX thread model"), - clEnumValN(ThreadModel::Single, "single", - "Single thread model"))); - -cl::opt<llvm::CodeModel::Model> -CMModel("code-model", - cl::desc("Choose code model"), - cl::init(CodeModel::Default), - cl::values(clEnumValN(CodeModel::Default, "default", - "Target default code model"), - clEnumValN(CodeModel::Small, "small", - "Small code model"), - clEnumValN(CodeModel::Kernel, "kernel", - "Kernel code model"), - clEnumValN(CodeModel::Medium, "medium", - "Medium code model"), - clEnumValN(CodeModel::Large, "large", - "Large code model"))); - -cl::opt<llvm::ExceptionHandling> -ExceptionModel("exception-model", - cl::desc("exception model"), - cl::init(ExceptionHandling::None), - cl::values(clEnumValN(ExceptionHandling::None, "default", - "default exception handling model"), - clEnumValN(ExceptionHandling::DwarfCFI, "dwarf", - "DWARF-like CFI based exception handling"), - clEnumValN(ExceptionHandling::SjLj, "sjlj", - "SjLj exception handling"), - clEnumValN(ExceptionHandling::ARM, "arm", - "ARM EHABI exceptions"), - clEnumValN(ExceptionHandling::WinEH, "wineh", - "Windows exception model"))); - -cl::opt<TargetMachine::CodeGenFileType> -FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile), - cl::desc("Choose a file type (not all types are supported by all targets):"), - cl::values( - clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm", - "Emit an assembly ('.s') file"), - clEnumValN(TargetMachine::CGFT_ObjectFile, "obj", - "Emit a native object ('.o') file"), - clEnumValN(TargetMachine::CGFT_Null, "null", - "Emit nothing, for performance testing"))); - -cl::opt<bool> -DisableFPElim("disable-fp-elim", - cl::desc("Disable frame pointer elimination optimization"), - cl::init(false)); - -cl::opt<bool> -EnableUnsafeFPMath("enable-unsafe-fp-math", - cl::desc("Enable optimizations that may decrease FP precision"), - cl::init(false)); - -cl::opt<bool> -EnableNoInfsFPMath("enable-no-infs-fp-math", - cl::desc("Enable FP math optimizations that assume no +-Infs"), - cl::init(false)); - -cl::opt<bool> -EnableNoNaNsFPMath("enable-no-nans-fp-math", - cl::desc("Enable FP math optimizations that assume no NaNs"), - cl::init(false)); - -cl::opt<bool> -EnableNoSignedZerosFPMath("enable-no-signed-zeros-fp-math", - cl::desc("Enable FP math optimizations that assume " - "the sign of 0 is insignificant"), - cl::init(false)); - -cl::opt<bool> -EnableNoTrappingFPMath("enable-no-trapping-fp-math", - cl::desc("Enable setting the FP exceptions build " - "attribute not to use exceptions"), - cl::init(false)); - -cl::opt<llvm::FPDenormal::DenormalMode> -DenormalMode("denormal-fp-math", - cl::desc("Select which denormal numbers the code is permitted to require"), - cl::init(FPDenormal::IEEE), - cl::values( - clEnumValN(FPDenormal::IEEE, "ieee", - "IEEE 754 denormal numbers"), - clEnumValN(FPDenormal::PreserveSign, "preserve-sign", - "the sign of a flushed-to-zero number is preserved " - "in the sign of 0"), - clEnumValN(FPDenormal::PositiveZero, "positive-zero", - "denormals are flushed to positive zero"))); - -cl::opt<bool> -EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", - cl::Hidden, - cl::desc("Force codegen to assume rounding mode can change dynamically"), - cl::init(false)); - -cl::opt<llvm::FloatABI::ABIType> -FloatABIForCalls("float-abi", - cl::desc("Choose float ABI type"), - cl::init(FloatABI::Default), - cl::values( - clEnumValN(FloatABI::Default, "default", - "Target default float ABI type"), - clEnumValN(FloatABI::Soft, "soft", - "Soft float ABI (implied by -soft-float)"), - clEnumValN(FloatABI::Hard, "hard", - "Hard float ABI (uses FP registers)"))); - -cl::opt<llvm::FPOpFusion::FPOpFusionMode> -FuseFPOps("fp-contract", - cl::desc("Enable aggressive formation of fused FP ops"), - cl::init(FPOpFusion::Standard), - cl::values( - clEnumValN(FPOpFusion::Fast, "fast", - "Fuse FP ops whenever profitable"), - clEnumValN(FPOpFusion::Standard, "on", - "Only fuse 'blessed' FP ops."), - clEnumValN(FPOpFusion::Strict, "off", - "Only fuse FP ops when the result won't be affected."))); - -cl::opt<bool> -DontPlaceZerosInBSS("nozero-initialized-in-bss", - cl::desc("Don't place zero-initialized symbols into bss section"), - cl::init(false)); - -cl::opt<bool> -EnableGuaranteedTailCallOpt("tailcallopt", - cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), - cl::init(false)); - -cl::opt<bool> -DisableTailCalls("disable-tail-calls", - cl::desc("Never emit tail calls"), - cl::init(false)); - -cl::opt<bool> -StackSymbolOrdering("stack-symbol-ordering", - cl::desc("Order local stack symbols."), - cl::init(true)); - -cl::opt<unsigned> -OverrideStackAlignment("stack-alignment", - cl::desc("Override default stack alignment"), - cl::init(0)); - -cl::opt<bool> -StackRealign("stackrealign", - cl::desc("Force align the stack to the minimum alignment"), - cl::init(false)); - -cl::opt<std::string> -TrapFuncName("trap-func", cl::Hidden, - cl::desc("Emit a call to trap function rather than a trap instruction"), - cl::init("")); - -cl::opt<bool> -UseCtors("use-ctors", - cl::desc("Use .ctors instead of .init_array."), - cl::init(false)); - -cl::opt<bool> RelaxELFRelocations( - "relax-elf-relocations", - cl::desc("Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), - cl::init(false)); - -cl::opt<bool> DataSections("data-sections", - cl::desc("Emit data into separate sections"), - cl::init(false)); - -cl::opt<bool> -FunctionSections("function-sections", - cl::desc("Emit functions into separate sections"), - cl::init(false)); - -cl::opt<bool> EmulatedTLS("emulated-tls", - cl::desc("Use emulated TLS model"), - cl::init(false)); - -cl::opt<bool> UniqueSectionNames("unique-section-names", - cl::desc("Give unique names to every section"), - cl::init(true)); - -cl::opt<llvm::EABI> EABIVersion( - "meabi", cl::desc("Set EABI type (default depends on triple):"), - cl::init(EABI::Default), - cl::values(clEnumValN(EABI::Default, "default", - "Triple default EABI version"), - clEnumValN(EABI::EABI4, "4", "EABI version 4"), - clEnumValN(EABI::EABI5, "5", "EABI version 5"), - clEnumValN(EABI::GNU, "gnu", "EABI GNU"))); - -cl::opt<DebuggerKind> -DebuggerTuningOpt("debugger-tune", - cl::desc("Tune debug info for a particular debugger"), - cl::init(DebuggerKind::Default), - cl::values( - clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), - clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), - clEnumValN(DebuggerKind::SCE, "sce", - "SCE targets (e.g. PS4)"))); - -// Common utility function tightly tied to the options listed here. Initializes -// a TargetOptions object with CodeGen flags and returns it. -static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { - TargetOptions Options; - Options.AllowFPOpFusion = FuseFPOps; - Options.UnsafeFPMath = EnableUnsafeFPMath; - Options.NoInfsFPMath = EnableNoInfsFPMath; - Options.NoNaNsFPMath = EnableNoNaNsFPMath; - Options.NoSignedZerosFPMath = EnableNoSignedZerosFPMath; - Options.NoTrappingFPMath = EnableNoTrappingFPMath; - Options.FPDenormalMode = DenormalMode; - Options.HonorSignDependentRoundingFPMathOption = - EnableHonorSignDependentRoundingFPMath; - if (FloatABIForCalls != FloatABI::Default) - Options.FloatABIType = FloatABIForCalls; - Options.NoZerosInBSS = DontPlaceZerosInBSS; - Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt; - Options.StackAlignmentOverride = OverrideStackAlignment; - Options.StackSymbolOrdering = StackSymbolOrdering; - Options.UseInitArray = !UseCtors; - Options.RelaxELFRelocations = RelaxELFRelocations; - Options.DataSections = DataSections; - Options.FunctionSections = FunctionSections; - Options.UniqueSectionNames = UniqueSectionNames; - Options.EmulatedTLS = EmulatedTLS; - Options.ExceptionModel = ExceptionModel; - - Options.MCOptions = InitMCTargetOptionsFromFlags(); - - Options.ThreadModel = TMModel; - Options.EABIVersion = EABIVersion; - Options.DebuggerTuning = DebuggerTuningOpt; - - return Options; -} - -static inline std::string getCPUStr() { - // If user asked for the 'native' CPU, autodetect here. If autodection fails, - // this will set the CPU to an empty string which tells the target to - // pick a basic default. - if (MCPU == "native") - return sys::getHostCPUName(); - - return MCPU; -} - -static inline std::string getFeaturesStr() { - SubtargetFeatures Features; - - // If user asked for the 'native' CPU, we need to autodetect features. - // This is necessary for x86 where the CPU might not support all the - // features the autodetected CPU name lists in the target. For example, - // not all Sandybridge processors support AVX. - if (MCPU == "native") { - StringMap<bool> HostFeatures; - if (sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.AddFeature(F.first(), F.second); - } - - for (unsigned i = 0; i != MAttrs.size(); ++i) - Features.AddFeature(MAttrs[i]); - - return Features.getString(); -} - -/// \brief Set function attributes of functions in Module M based on CPU, -/// Features, and command line flags. -static inline void setFunctionAttributes(StringRef CPU, StringRef Features, - Module &M) { - for (auto &F : M) { - auto &Ctx = F.getContext(); - AttributeList Attrs = F.getAttributes(); - AttrBuilder NewAttrs; - - if (!CPU.empty()) - NewAttrs.addAttribute("target-cpu", CPU); - if (!Features.empty()) - NewAttrs.addAttribute("target-features", Features); - if (DisableFPElim.getNumOccurrences() > 0) - NewAttrs.addAttribute("no-frame-pointer-elim", - DisableFPElim ? "true" : "false"); - if (DisableTailCalls.getNumOccurrences() > 0) - NewAttrs.addAttribute("disable-tail-calls", - toStringRef(DisableTailCalls)); - if (StackRealign) - NewAttrs.addAttribute("stackrealign"); - - if (TrapFuncName.getNumOccurrences() > 0) - for (auto &B : F) - for (auto &I : B) - if (auto *Call = dyn_cast<CallInst>(&I)) - if (const auto *F = Call->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::debugtrap || - F->getIntrinsicID() == Intrinsic::trap) - Call->addAttribute( - llvm::AttributeList::FunctionIndex, - Attribute::get(Ctx, "trap-func-name", TrapFuncName)); - - // Let NewAttrs override Attrs. - F.setAttributes( - Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); - } -} - -#endif diff --git a/include/llvm/CodeGen/CostTable.h b/include/llvm/CodeGen/CostTable.h new file mode 100644 index 0000000000000..5a6368c5a0f8a --- /dev/null +++ b/include/llvm/CodeGen/CostTable.h @@ -0,0 +1,69 @@ +//===-- CostTable.h - Instruction Cost Table handling -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Cost tables and simple lookup functions +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_COSTTABLE_H_ +#define LLVM_CODEGEN_COSTTABLE_H_ + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineValueType.h" + +namespace llvm { + +/// Cost Table Entry +struct CostTblEntry { + int ISD; + MVT::SimpleValueType Type; + unsigned Cost; +}; + +/// Find in cost table, TypeTy must be comparable to CompareTy by == +inline const CostTblEntry *CostTableLookup(ArrayRef<CostTblEntry> Tbl, + int ISD, MVT Ty) { + auto I = find_if(Tbl, [=](const CostTblEntry &Entry) { + return ISD == Entry.ISD && Ty == Entry.Type; + }); + if (I != Tbl.end()) + return I; + + // Could not find an entry. + return nullptr; +} + +/// Type Conversion Cost Table +struct TypeConversionCostTblEntry { + int ISD; + MVT::SimpleValueType Dst; + MVT::SimpleValueType Src; + unsigned Cost; +}; + +/// Find in type conversion cost table, TypeTy must be comparable to CompareTy +/// by == +inline const TypeConversionCostTblEntry * +ConvertCostTableLookup(ArrayRef<TypeConversionCostTblEntry> Tbl, + int ISD, MVT Dst, MVT Src) { + auto I = find_if(Tbl, [=](const TypeConversionCostTblEntry &Entry) { + return ISD == Entry.ISD && Src == Entry.Src && Dst == Entry.Dst; + }); + if (I != Tbl.end()) + return I; + + // Could not find an entry. + return nullptr; +} + +} // namespace llvm + +#endif /* LLVM_CODEGEN_COSTTABLE_H_ */ diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index 77c37ac7abeae..d3aabe22f2165 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -208,6 +208,13 @@ public: // Add a DAG mutation to be done before the packetization begins. void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation); + + bool alias(const MachineInstr &MI1, const MachineInstr &MI2, + bool UseTBAA = true) const; + +private: + bool alias(const MachineMemOperand &Op1, const MachineMemOperand &Op2, + bool UseTBAA = true) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 74e4179e73e98..85bb826dcb8c4 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -27,7 +28,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Target/TargetLowering.h" #include <algorithm> #include <cstdint> #include <utility> diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h index 98ff526dfe946..55e25c9823b17 100644 --- a/include/llvm/CodeGen/FaultMaps.h +++ b/include/llvm/CodeGen/FaultMaps.h @@ -39,6 +39,9 @@ public: void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel); void serializeToFaultMapSection(); + void reset() { + FunctionInfos.clear(); + } private: static const char *WFMP; diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index f32a58915118f..3b39d87ffb4a4 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -23,11 +23,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <utility> #include <vector> diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index e7ce1946889e3..ba84d76de1649 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -18,10 +18,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetCallingConv.h" #include <cstdint> #include <functional> diff --git a/include/llvm/CodeGen/GlobalISel/GISelAccessor.h b/include/llvm/CodeGen/GlobalISel/GISelAccessor.h deleted file mode 100644 index 8dea38059ea47..0000000000000 --- a/include/llvm/CodeGen/GlobalISel/GISelAccessor.h +++ /dev/null @@ -1,39 +0,0 @@ -//===-- GISelAccessor.h - GISel Accessor ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// This file declares the API to access the various APIs related -/// to GlobalISel. -// -//===----------------------------------------------------------------------===/ - -#ifndef LLVM_CODEGEN_GLOBALISEL_GISELACCESSOR_H -#define LLVM_CODEGEN_GLOBALISEL_GISELACCESSOR_H - -namespace llvm { -class CallLowering; -class InstructionSelector; -class LegalizerInfo; -class RegisterBankInfo; - -/// The goal of this helper class is to gather the accessor to all -/// the APIs related to GlobalISel. -/// It should be derived to feature an actual accessor to the GISel APIs. -/// The reason why this is not simply done into the subtarget is to avoid -/// spreading ifdefs around. -struct GISelAccessor { - virtual ~GISelAccessor() {} - virtual const CallLowering *getCallLowering() const { return nullptr;} - virtual const InstructionSelector *getInstructionSelector() const { - return nullptr; - } - virtual const LegalizerInfo *getLegalizerInfo() const { return nullptr; } - virtual const RegisterBankInfo *getRegBankInfo() const { return nullptr;} -}; -} // End namespace llvm; -#endif diff --git a/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h new file mode 100644 index 0000000000000..167905dc9aa1a --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -0,0 +1,69 @@ +//===- GISelWorkList.h - Worklist for GISel passes ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_GISEL_WORKLIST_H +#define LLVM_GISEL_WORKLIST_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +class MachineInstr; + +// Worklist which mostly works similar to InstCombineWorkList, but on MachineInstrs. +// The main difference with something like a SetVector is that erasing an element doesn't +// move all elements over one place - instead just nulls out the element of the vector. +// FIXME: Does it make sense to factor out common code with the instcombinerWorkList? +template<unsigned N> +class GISelWorkList { + SmallVector<MachineInstr*, N> Worklist; + DenseMap<MachineInstr*, unsigned> WorklistMap; + +public: + GISelWorkList() = default; + + bool empty() const { return WorklistMap.empty(); } + + unsigned size() const { return WorklistMap.size(); } + + /// Add - Add the specified instruction to the worklist if it isn't already + /// in it. + void insert(MachineInstr *I) { + if (WorklistMap.try_emplace(I, Worklist.size()).second) { + Worklist.push_back(I); + } + } + + /// Remove - remove I from the worklist if it exists. + void remove(MachineInstr *I) { + auto It = WorklistMap.find(I); + if (It == WorklistMap.end()) return; // Not in worklist. + + // Don't bother moving everything down, just null out the slot. + Worklist[It->second] = nullptr; + + WorklistMap.erase(It); + } + + MachineInstr *pop_back_val() { + MachineInstr *I; + do { + I = Worklist.pop_back_val(); + } while(!I); + assert(I && "Pop back on empty worklist"); + WorklistMap.erase(I); + return I; + } +}; + +} // end namespace llvm. + +#endif diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 1060d8fd667e6..e599a1b179ece 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -16,7 +16,10 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CodeGenCoverage.h" #include <bitset> #include <cstddef> #include <cstdint> @@ -26,9 +29,12 @@ namespace llvm { +class APInt; +class APFloat; class LLT; class MachineInstr; class MachineInstrBuilder; +class MachineFunction; class MachineOperand; class MachineRegisterInfo; class RegisterBankInfo; @@ -63,6 +69,18 @@ public: }; enum { + /// Begin a try-block to attempt a match and jump to OnFail if it is + /// unsuccessful. + /// - OnFail - The MatchTable entry at which to resume if the match fails. + /// + /// FIXME: This ought to take an argument indicating the number of try-blocks + /// to exit on failure. It's usually one but the last match attempt of + /// a block will need more. The (implemented) alternative is to tack a + /// GIM_Reject on the end of each try-block which is simpler but + /// requires an extra opcode and iteration in the interpreter on each + /// failed match. + GIM_Try, + /// Record the specified instruction /// - NewInsnID - Instruction ID to define /// - InsnID - Instruction ID @@ -81,12 +99,35 @@ enum { /// - InsnID - Instruction ID /// - Expected number of operands GIM_CheckNumOperands, + /// Check an immediate predicate on the specified instruction + /// - InsnID - Instruction ID + /// - The predicate to test + GIM_CheckI64ImmPredicate, + /// Check an immediate predicate on the specified instruction via an APInt. + /// - InsnID - Instruction ID + /// - The predicate to test + GIM_CheckAPIntImmPredicate, + /// Check a floating point immediate predicate on the specified instruction. + /// - InsnID - Instruction ID + /// - The predicate to test + GIM_CheckAPFloatImmPredicate, + /// Check a memory operation has the specified atomic ordering. + /// - InsnID - Instruction ID + /// - Ordering - The AtomicOrdering value + GIM_CheckAtomicOrdering, + GIM_CheckAtomicOrderingOrStrongerThan, + GIM_CheckAtomicOrderingWeakerThan, /// Check the type for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index /// - Expected type GIM_CheckType, + /// Check the type of a pointer to any address space. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - SizeInBits - The size of the pointer value in bits. + GIM_CheckPointerToAny, /// Check the register bank for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index @@ -124,6 +165,17 @@ enum { /// - InsnID - Instruction ID GIM_CheckIsSafeToFold, + /// Check the specified operands are identical. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - OtherInsnID - Other instruction ID + /// - OtherOpIdx - Other operand index + GIM_CheckIsSameOperand, + + /// Fail the current try-block, or completely fail to match if there is no + /// current try-block. + GIM_Reject, + //=== Renderers === /// Mutate an instruction @@ -141,6 +193,13 @@ enum { /// - OldInsnID - Instruction ID to copy from /// - OpIdx - The operand to copy GIR_Copy, + /// Copy an operand to the specified instruction or add a zero register if the + /// operand is a zero immediate. + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// - OpIdx - The operand to copy + /// - ZeroReg - The zero register to use + GIR_CopyOrAddZeroReg, /// Copy an operand to the specified instruction /// - NewInsnID - Instruction ID to modify /// - OldInsnID - Instruction ID to copy from @@ -159,6 +218,10 @@ enum { /// - InsnID - Instruction ID to modify /// - RegNum - The register to add GIR_AddRegister, + /// Add a a temporary register to the specified instruction + /// - InsnID - Instruction ID to modify + /// - TempRegID - The temporary register ID to add + GIR_AddTempRegister, /// Add an immediate to the specified instruction /// - InsnID - Instruction ID to modify /// - Imm - The immediate to add @@ -167,6 +230,17 @@ enum { /// - InsnID - Instruction ID to modify /// - RendererID - The renderer to call GIR_ComplexRenderer, + /// Render sub-operands of complex operands to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RendererID - The renderer to call + /// - RenderOpID - The suboperand to render. + GIR_ComplexSubOperandRenderer, + + /// Render a G_CONSTANT operator as a sign-extended immediate. + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// The operand index is implicitly 1. + GIR_CopyConstantAsSImm, /// Constrain an instruction operand to a register class. /// - InsnID - Instruction ID to modify @@ -179,18 +253,39 @@ enum { GIR_ConstrainSelectedInstOperands, /// Merge all memory operands into instruction. /// - InsnID - Instruction ID to modify + /// - MergeInsnID... - One or more Instruction ID to merge into the result. + /// - GIU_MergeMemOperands_EndOfList - Terminates the list of instructions to + /// merge. GIR_MergeMemOperands, /// Erase from parent. /// - InsnID - Instruction ID to erase GIR_EraseFromParent, + /// Create a new temporary register that's not constrained. + /// - TempRegID - The temporary register ID to initialize. + /// - Expected type + GIR_MakeTempReg, /// A successful emission GIR_Done, + + /// Increment the rule coverage counter. + /// - RuleID - The ID of the rule that was covered. + GIR_Coverage, +}; + +enum { + /// Indicates the end of the variable-length MergeInsnID list in a + /// GIR_MergeMemOperands opcode. + GIU_MergeMemOperands_EndOfList = -1, }; /// Provides the logic to select generic machine instructions. class InstructionSelector { public: + using I64ImmediatePredicateFn = bool (*)(int64_t); + using APIntImmediatePredicateFn = bool (*)(const APInt &); + using APFloatImmediatePredicateFn = bool (*)(const APFloat &); + virtual ~InstructionSelector() = default; /// Select the (possibly generic) instruction \p I to only use target-specific @@ -203,17 +298,18 @@ public: /// if returns true: /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) - /// - virtual bool select(MachineInstr &I) const = 0; + virtual bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const = 0; protected: - using ComplexRendererFn = std::function<void(MachineInstrBuilder &)>; + using ComplexRendererFns = + Optional<SmallVector<std::function<void(MachineInstrBuilder &)>, 4>>; using RecordedMIVector = SmallVector<MachineInstr *, 4>; using NewMIVector = SmallVector<MachineInstrBuilder, 4>; struct MatcherState { - std::vector<ComplexRendererFn> Renderers; + std::vector<ComplexRendererFns::value_type> Renderers; RecordedMIVector MIs; + DenseMap<unsigned, unsigned> TempRegisters; MatcherState(unsigned MaxRenderers); }; @@ -223,7 +319,10 @@ public: struct MatcherInfoTy { const LLT *TypeObjects; const PredicateBitset *FeatureBitsets; - const std::vector<ComplexMatcherMemFn> ComplexPredicates; + const I64ImmediatePredicateFn *I64ImmPredicateFns; + const APIntImmediatePredicateFn *APIntImmPredicateFns; + const APFloatImmediatePredicateFn *APFloatImmPredicateFns; + const ComplexMatcherMemFn *ComplexPredicates; }; protected: @@ -238,8 +337,8 @@ protected: const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo, const int64_t *MatchTable, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI, - const PredicateBitset &AvailableFeatures) const; + const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, + CodeGenCoverage &CoverageInfo) const; /// Constrain a register operand of an instruction \p I to a specified /// register class. This could involve inserting COPYs before (for uses) or @@ -268,7 +367,16 @@ protected: bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; - bool isObviouslySafeToFold(MachineInstr &MI) const; + /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the + /// right-hand side. GlobalISel's separation of pointer and integer types + /// means that we don't need to worry about G_OR with equivalent semantics. + bool isBaseWithConstantOffset(const MachineOperand &Root, + const MachineRegisterInfo &MRI) const; + + /// Return true if MI can obviously be folded into IntoMI. + /// MI and IntoMI do not need to be in the same basic blocks, but MI must + /// preceed IntoMI. + bool isObviouslySafeToFold(MachineInstr &MI, MachineInstr &IntoMI) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index 98b6b859b9e26..ac2c055ab1452 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h ---------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,32 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstddef> +#include <cstdint> + namespace llvm { + +/// GlobalISel PatFrag Predicates +enum { + GIPFP_I64_Invalid = 0, + GIPFP_APInt_Invalid = 0, + GIPFP_APFloat_Invalid = 0, +}; + template <class TgtInstructionSelector, class PredicateBitset, class ComplexMatcherMemFn> bool InstructionSelector::executeMatchTable( @@ -24,306 +49,687 @@ bool InstructionSelector::executeMatchTable( const MatcherInfoTy<PredicateBitset, ComplexMatcherMemFn> &MatcherInfo, const int64_t *MatchTable, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI, - const PredicateBitset &AvailableFeatures) const { - const int64_t *Command = MatchTable; + const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, + CodeGenCoverage &CoverageInfo) const { + uint64_t CurrentIdx = 0; + SmallVector<uint64_t, 8> OnFailResumeAt; + + enum RejectAction { RejectAndGiveUp, RejectAndResume }; + auto handleReject = [&]() -> RejectAction { + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": Rejected\n"); + if (OnFailResumeAt.empty()) + return RejectAndGiveUp; + CurrentIdx = OnFailResumeAt.back(); + OnFailResumeAt.pop_back(); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": Resume at " << CurrentIdx << " (" + << OnFailResumeAt.size() << " try-blocks remain)\n"); + return RejectAndResume; + }; + while (true) { - switch (*Command++) { + assert(CurrentIdx != ~0u && "Invalid MatchTable index"); + switch (MatchTable[CurrentIdx++]) { + case GIM_Try: { + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": Begin try-block\n"); + OnFailResumeAt.push_back(MatchTable[CurrentIdx++]); + break; + } + case GIM_RecordInsn: { - int64_t NewInsnID = *Command++; - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; // As an optimisation we require that MIs[0] is always the root. Refuse // any attempt to modify it. assert(NewInsnID != 0 && "Refusing to modify MIs[0]"); - (void)NewInsnID; MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); if (!MO.isReg()) { - DEBUG(dbgs() << "Rejected (not a register)\n"); - return false; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": Not a register\n"); + if (handleReject() == RejectAndGiveUp) + return false; + break; } if (TRI.isPhysicalRegister(MO.getReg())) { - DEBUG(dbgs() << "Rejected (is a physical register)\n"); - return false; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": Is a physical register\n"); + if (handleReject() == RejectAndGiveUp) + return false; + break; } - assert((size_t)NewInsnID == State.MIs.size() && - "Expected to store MIs in order"); - State.MIs.push_back(MRI.getVRegDef(MO.getReg())); - DEBUG(dbgs() << "MIs[" << NewInsnID << "] = GIM_RecordInsn(" << InsnID - << ", " << OpIdx << ")\n"); + MachineInstr *NewMI = MRI.getVRegDef(MO.getReg()); + if ((size_t)NewInsnID < State.MIs.size()) + State.MIs[NewInsnID] = NewMI; + else { + assert((size_t)NewInsnID == State.MIs.size() && + "Expected to store MIs in order"); + State.MIs.push_back(NewMI); + } + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": MIs[" << NewInsnID + << "] = GIM_RecordInsn(" << InsnID << ", " << OpIdx + << ")\n"); break; } case GIM_CheckFeatures: { - int64_t ExpectedBitsetID = *Command++; - DEBUG(dbgs() << "GIM_CheckFeatures(ExpectedBitsetID=" << ExpectedBitsetID - << ")\n"); + int64_t ExpectedBitsetID = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckFeatures(ExpectedBitsetID=" + << ExpectedBitsetID << ")\n"); if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) != MatcherInfo.FeatureBitsets[ExpectedBitsetID]) { - DEBUG(dbgs() << "Rejected\n"); - return false; + if (handleReject() == RejectAndGiveUp) + return false; } break; } case GIM_CheckOpcode: { - int64_t InsnID = *Command++; - int64_t Expected = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Expected = MatchTable[CurrentIdx++]; unsigned Opcode = State.MIs[InsnID]->getOpcode(); - DEBUG(dbgs() << "GIM_CheckOpcode(MIs[" << InsnID << "], ExpectedOpcode=" - << Expected << ") // Got=" << Opcode << "\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID + << "], ExpectedOpcode=" << Expected + << ") // Got=" << Opcode << "\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (Opcode != Expected) - return false; + if (Opcode != Expected) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } + case GIM_CheckNumOperands: { - int64_t InsnID = *Command++; - int64_t Expected = *Command++; - DEBUG(dbgs() << "GIM_CheckNumOperands(MIs[" << InsnID - << "], Expected=" << Expected << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Expected = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckNumOperands(MIs[" + << InsnID << "], Expected=" << Expected << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (State.MIs[InsnID]->getNumOperands() != Expected) - return false; + if (State.MIs[InsnID]->getNumOperands() != Expected) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } + case GIM_CheckI64ImmPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Predicate = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() + << CurrentIdx << ": GIM_CheckI64ImmPredicate(MIs[" + << InsnID << "], Predicate=" << Predicate << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_CONSTANT && + "Expected G_CONSTANT"); + assert(Predicate > GIPFP_I64_Invalid && "Expected a valid predicate"); + int64_t Value = 0; + if (State.MIs[InsnID]->getOperand(1).isCImm()) + Value = State.MIs[InsnID]->getOperand(1).getCImm()->getSExtValue(); + else if (State.MIs[InsnID]->getOperand(1).isImm()) + Value = State.MIs[InsnID]->getOperand(1).getImm(); + else + llvm_unreachable("Expected Imm or CImm operand"); + + if (!MatcherInfo.I64ImmPredicateFns[Predicate](Value)) + if (handleReject() == RejectAndGiveUp) + return false; break; } + case GIM_CheckAPIntImmPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Predicate = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() + << CurrentIdx << ": GIM_CheckAPIntImmPredicate(MIs[" + << InsnID << "], Predicate=" << Predicate << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[InsnID]->getOpcode() && "Expected G_CONSTANT"); + assert(Predicate > GIPFP_APInt_Invalid && "Expected a valid predicate"); + APInt Value; + if (State.MIs[InsnID]->getOperand(1).isCImm()) + Value = State.MIs[InsnID]->getOperand(1).getCImm()->getValue(); + else + llvm_unreachable("Expected Imm or CImm operand"); + + if (!MatcherInfo.APIntImmPredicateFns[Predicate](Value)) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAPFloatImmPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Predicate = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() + << CurrentIdx << ": GIM_CheckAPFloatImmPredicate(MIs[" + << InsnID << "], Predicate=" << Predicate << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_FCONSTANT && + "Expected G_FCONSTANT"); + assert(State.MIs[InsnID]->getOperand(1).isFPImm() && "Expected FPImm operand"); + assert(Predicate > GIPFP_APFloat_Invalid && "Expected a valid predicate"); + APFloat Value = State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF(); + + if (!MatcherInfo.APFloatImmPredicateFns[Predicate](Value)) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAtomicOrdering: { + int64_t InsnID = MatchTable[CurrentIdx++]; + AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs[" + << InsnID << "], " << (uint64_t)Ordering << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->hasOneMemOperand()) + if (handleReject() == RejectAndGiveUp) + return false; + + for (const auto &MMO : State.MIs[InsnID]->memoperands()) + if (MMO->getOrdering() != Ordering) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAtomicOrderingOrStrongerThan: { + int64_t InsnID = MatchTable[CurrentIdx++]; + AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs[" + << InsnID << "], " << (uint64_t)Ordering << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (!State.MIs[InsnID]->hasOneMemOperand()) + if (handleReject() == RejectAndGiveUp) + return false; + + for (const auto &MMO : State.MIs[InsnID]->memoperands()) + if (!isAtLeastOrStrongerThan(MMO->getOrdering(), Ordering)) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAtomicOrderingWeakerThan: { + int64_t InsnID = MatchTable[CurrentIdx++]; + AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckAtomicOrderingWeakerThan(MIs[" + << InsnID << "], " << (uint64_t)Ordering << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (!State.MIs[InsnID]->hasOneMemOperand()) + if (handleReject() == RejectAndGiveUp) + return false; + + for (const auto &MMO : State.MIs[InsnID]->memoperands()) + if (!isStrongerThan(Ordering, MMO->getOrdering())) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckType: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t TypeID = *Command++; - DEBUG(dbgs() << "GIM_CheckType(MIs[" << InsnID << "]->getOperand(" - << OpIdx << "), TypeID=" << TypeID << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t TypeID = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckType(MIs[" << InsnID + << "]->getOperand(" << OpIdx + << "), TypeID=" << TypeID << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) != - MatcherInfo.TypeObjects[TypeID]) - return false; + MatcherInfo.TypeObjects[TypeID]) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } + case GIM_CheckPointerToAny: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t SizeInBits = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), SizeInBits=" << SizeInBits << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + // iPTR must be looked up in the target. + if (SizeInBits == 0) { + MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); + SizeInBits = MF->getDataLayout().getPointerSizeInBits(0); + } + + assert(SizeInBits != 0 && "Pointer size must be known"); + + const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); + if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } case GIM_CheckRegBankForClass: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t RCEnum = *Command++; - DEBUG(dbgs() << "GIM_CheckRegBankForClass(MIs[" << InsnID - << "]->getOperand(" << OpIdx << "), RCEnum=" << RCEnum - << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t RCEnum = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckRegBankForClass(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), RCEnum=" << RCEnum << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); if (&RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) != - RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI, TRI)) - return false; + RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI, + TRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } + case GIM_CheckComplexPattern: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t RendererID = *Command++; - int64_t ComplexPredicateID = *Command++; - DEBUG(dbgs() << "State.Renderers[" << RendererID - << "] = GIM_CheckComplexPattern(MIs[" << InsnID - << "]->getOperand(" << OpIdx - << "), ComplexPredicateID=" << ComplexPredicateID << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t RendererID = MatchTable[CurrentIdx++]; + int64_t ComplexPredicateID = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": State.Renderers[" << RendererID + << "] = GIM_CheckComplexPattern(MIs[" << InsnID + << "]->getOperand(" << OpIdx + << "), ComplexPredicateID=" << ComplexPredicateID + << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); // FIXME: Use std::invoke() when it's available. - if (!(State.Renderers[RendererID] = - (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( - State.MIs[InsnID]->getOperand(OpIdx)))) - return false; + ComplexRendererFns Renderer = + (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( + State.MIs[InsnID]->getOperand(OpIdx)); + if (Renderer.hasValue()) + State.Renderers[RendererID] = Renderer.getValue(); + else + if (handleReject() == RejectAndGiveUp) + return false; break; } + case GIM_CheckConstantInt: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t Value = *Command++; - DEBUG(dbgs() << "GIM_CheckConstantInt(MIs[" << InsnID << "]->getOperand(" - << OpIdx << "), Value=" << Value << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t Value = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckConstantInt(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, MRI)) - return false; + + // isOperandImmEqual() will sign-extend to 64-bits, so should we. + LLT Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); + Value = SignExtend64(Value, Ty.getSizeInBits()); + + if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, + MRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } + case GIM_CheckLiteralInt: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t Value = *Command++; - DEBUG(dbgs() << "GIM_CheckLiteralInt(MIs[" << InsnID << "]->getOperand(" << OpIdx - << "), Value=" << Value << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t Value = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckLiteralInt(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); - if (!OM.isCImm() || !OM.getCImm()->equalsInt(Value)) - return false; + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isCImm() || !MO.getCImm()->equalsInt(Value)) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } + case GIM_CheckIntrinsicID: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t Value = *Command++; - DEBUG(dbgs() << "GIM_CheckIntrinsicID(MIs[" << InsnID << "]->getOperand(" << OpIdx - << "), Value=" << Value << ")\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t Value = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIntrinsicID(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); - if (!OM.isIntrinsicID() || OM.getIntrinsicID() != Value) - return false; + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isIntrinsicID() || MO.getIntrinsicID() != Value) + if (handleReject() == RejectAndGiveUp) + return false; break; } + case GIM_CheckIsMBB: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - DEBUG(dbgs() << "GIM_CheckIsMBB(MIs[" << InsnID << "]->getOperand(" - << OpIdx << "))\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsMBB(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "))\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (!State.MIs[InsnID]->getOperand(OpIdx).isMBB()) - return false; + if (!State.MIs[InsnID]->getOperand(OpIdx).isMBB()) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } case GIM_CheckIsSafeToFold: { - int64_t InsnID = *Command++; - DEBUG(dbgs() << "GIM_CheckIsSafeToFold(MIs[" << InsnID << "])\n"); + int64_t InsnID = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsSafeToFold(MIs[" + << InsnID << "])\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - if (!isObviouslySafeToFold(*State.MIs[InsnID])) - return false; + if (!isObviouslySafeToFold(*State.MIs[InsnID], *State.MIs[0])) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } + case GIM_CheckIsSameOperand: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t OtherInsnID = MatchTable[CurrentIdx++]; + int64_t OtherOpIdx = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckIsSameOperand(MIs[" + << InsnID << "][" << OpIdx << "], MIs[" + << OtherInsnID << "][" << OtherOpIdx << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[OtherInsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isIdenticalTo( + State.MIs[OtherInsnID]->getOperand(OtherOpIdx))) { + if (handleReject() == RejectAndGiveUp) + return false; + } break; } + case GIM_Reject: + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_Reject"); + if (handleReject() == RejectAndGiveUp) + return false; + break; case GIR_MutateOpcode: { - int64_t OldInsnID = *Command++; - int64_t NewInsnID = *Command++; - int64_t NewOpcode = *Command++; - assert((size_t)NewInsnID == OutMIs.size() && - "Expected to store MIs in order"); - OutMIs.push_back( - MachineInstrBuilder(*State.MIs[OldInsnID]->getParent()->getParent(), - State.MIs[OldInsnID])); + int64_t OldInsnID = MatchTable[CurrentIdx++]; + uint64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t NewOpcode = MatchTable[CurrentIdx++]; + if (NewInsnID >= OutMIs.size()) + OutMIs.resize(NewInsnID + 1); + + OutMIs[NewInsnID] = MachineInstrBuilder(*State.MIs[OldInsnID]->getMF(), + State.MIs[OldInsnID]); OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode)); - DEBUG(dbgs() << "GIR_MutateOpcode(OutMIs[" << NewInsnID << "], MIs[" - << OldInsnID << "], " << NewOpcode << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_MutateOpcode(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID << "], " + << NewOpcode << ")\n"); break; } + case GIR_BuildMI: { - int64_t InsnID = *Command++; - int64_t Opcode = *Command++; - assert((size_t)InsnID == OutMIs.size() && - "Expected to store MIs in order"); - (void)InsnID; - OutMIs.push_back(BuildMI(*State.MIs[0]->getParent(), State.MIs[0], - State.MIs[0]->getDebugLoc(), TII.get(Opcode))); - DEBUG(dbgs() << "GIR_BuildMI(OutMIs[" << InsnID << "], " << Opcode - << ")\n"); + uint64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t Opcode = MatchTable[CurrentIdx++]; + if (NewInsnID >= OutMIs.size()) + OutMIs.resize(NewInsnID + 1); + + OutMIs[NewInsnID] = BuildMI(*State.MIs[0]->getParent(), State.MIs[0], + State.MIs[0]->getDebugLoc(), TII.get(Opcode)); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_BuildMI(OutMIs[" + << NewInsnID << "], " << Opcode << ")\n"); break; } case GIR_Copy: { - int64_t NewInsnID = *Command++; - int64_t OldInsnID = *Command++; - int64_t OpIdx = *Command++; + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(OpIdx)); - DEBUG(dbgs() << "GIR_Copy(OutMIs[" << NewInsnID << "], MIs[" << OldInsnID - << "], " << OpIdx << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() + << CurrentIdx << ": GIR_Copy(OutMIs[" << NewInsnID + << "], MIs[" << OldInsnID << "], " << OpIdx << ")\n"); break; } + + case GIR_CopyOrAddZeroReg: { + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t ZeroReg = MatchTable[CurrentIdx++]; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + MachineOperand &MO = State.MIs[OldInsnID]->getOperand(OpIdx); + if (isOperandImmEqual(MO, 0, MRI)) + OutMIs[NewInsnID].addReg(ZeroReg); + else + OutMIs[NewInsnID].add(MO); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_CopyOrAddZeroReg(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID << "], " + << OpIdx << ", " << ZeroReg << ")\n"); + break; + } + case GIR_CopySubReg: { - int64_t NewInsnID = *Command++; - int64_t OldInsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t SubRegIdx = *Command++; + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t SubRegIdx = MatchTable[CurrentIdx++]; assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); OutMIs[NewInsnID].addReg(State.MIs[OldInsnID]->getOperand(OpIdx).getReg(), 0, SubRegIdx); - DEBUG(dbgs() << "GIR_CopySubReg(OutMIs[" << NewInsnID << "], MIs[" - << OldInsnID << "], " << OpIdx << ", " << SubRegIdx - << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_CopySubReg(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID << "], " + << OpIdx << ", " << SubRegIdx << ")\n"); break; } + case GIR_AddImplicitDef: { - int64_t InsnID = *Command++; - int64_t RegNum = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t RegNum = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); OutMIs[InsnID].addDef(RegNum, RegState::Implicit); - DEBUG(dbgs() << "GIR_AddImplicitDef(OutMIs[" << InsnID << "], " << RegNum - << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddImplicitDef(OutMIs[" + << InsnID << "], " << RegNum << ")\n"); break; } + case GIR_AddImplicitUse: { - int64_t InsnID = *Command++; - int64_t RegNum = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t RegNum = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); OutMIs[InsnID].addUse(RegNum, RegState::Implicit); - DEBUG(dbgs() << "GIR_AddImplicitUse(OutMIs[" << InsnID << "], " << RegNum - << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddImplicitUse(OutMIs[" + << InsnID << "], " << RegNum << ")\n"); break; } + case GIR_AddRegister: { - int64_t InsnID = *Command++; - int64_t RegNum = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t RegNum = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); OutMIs[InsnID].addReg(RegNum); - DEBUG(dbgs() << "GIR_AddRegister(OutMIs[" << InsnID << "], " << RegNum - << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs[" + << InsnID << "], " << RegNum << ")\n"); + break; + } + + case GIR_AddTempRegister: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t TempRegID = MatchTable[CurrentIdx++]; + uint64_t TempRegFlags = MatchTable[CurrentIdx++]; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], TempRegFlags); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddTempRegister(OutMIs[" + << InsnID << "], TempRegisters[" << TempRegID + << "], " << TempRegFlags << ")\n"); break; } + case GIR_AddImm: { - int64_t InsnID = *Command++; - int64_t Imm = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Imm = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); OutMIs[InsnID].addImm(Imm); - DEBUG(dbgs() << "GIR_AddImm(OutMIs[" << InsnID << "], " << Imm << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_AddImm(OutMIs[" << InsnID + << "], " << Imm << ")\n"); break; } + case GIR_ComplexRenderer: { - int64_t InsnID = *Command++; - int64_t RendererID = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t RendererID = MatchTable[CurrentIdx++]; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + for (const auto &RenderOpFn : State.Renderers[RendererID]) + RenderOpFn(OutMIs[InsnID]); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_ComplexRenderer(OutMIs[" + << InsnID << "], " << RendererID << ")\n"); + break; + } + case GIR_ComplexSubOperandRenderer: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t RendererID = MatchTable[CurrentIdx++]; + int64_t RenderOpID = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - State.Renderers[RendererID](OutMIs[InsnID]); - DEBUG(dbgs() << "GIR_ComplexRenderer(OutMIs[" << InsnID << "], " - << RendererID << ")\n"); + State.Renderers[RendererID][RenderOpID](OutMIs[InsnID]); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIR_ComplexSubOperandRenderer(OutMIs[" + << InsnID << "], " << RendererID << ", " + << RenderOpID << ")\n"); + break; + } + + case GIR_CopyConstantAsSImm: { + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + if (State.MIs[OldInsnID]->getOperand(1).isCImm()) { + OutMIs[NewInsnID].addImm( + State.MIs[OldInsnID]->getOperand(1).getCImm()->getSExtValue()); + } else if (State.MIs[OldInsnID]->getOperand(1).isImm()) + OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(1)); + else + llvm_unreachable("Expected Imm or CImm operand"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_CopyConstantAsSImm(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID << "])\n"); break; } case GIR_ConstrainOperandRC: { - int64_t InsnID = *Command++; - int64_t OpIdx = *Command++; - int64_t RCEnum = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t RCEnum = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); constrainOperandRegToRegClass(*OutMIs[InsnID].getInstr(), OpIdx, *TRI.getRegClass(RCEnum), TII, TRI, RBI); - DEBUG(dbgs() << "GIR_ConstrainOperandRC(OutMIs[" << InsnID << "], " - << OpIdx << ", " << RCEnum << ")\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_ConstrainOperandRC(OutMIs[" + << InsnID << "], " << OpIdx << ", " << RCEnum + << ")\n"); break; } + case GIR_ConstrainSelectedInstOperands: { - int64_t InsnID = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); constrainSelectedInstRegOperands(*OutMIs[InsnID].getInstr(), TII, TRI, RBI); - DEBUG(dbgs() << "GIR_ConstrainSelectedInstOperands(OutMIs[" << InsnID - << "])\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIR_ConstrainSelectedInstOperands(OutMIs[" + << InsnID << "])\n"); break; } + case GIR_MergeMemOperands: { - int64_t InsnID = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - for (const auto *FromMI : State.MIs) - for (const auto &MMO : FromMI->memoperands()) + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_MergeMemOperands(OutMIs[" + << InsnID << "]"); + int64_t MergeInsnID = GIU_MergeMemOperands_EndOfList; + while ((MergeInsnID = MatchTable[CurrentIdx++]) != + GIU_MergeMemOperands_EndOfList) { + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << ", MIs[" << MergeInsnID << "]"); + for (const auto &MMO : State.MIs[MergeInsnID]->memoperands()) OutMIs[InsnID].addMemOperand(MMO); - DEBUG(dbgs() << "GIR_MergeMemOperands(OutMIs[" << InsnID << "])\n"); + } + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << ")\n"); break; } + case GIR_EraseFromParent: { - int64_t InsnID = *Command++; + int64_t InsnID = MatchTable[CurrentIdx++]; assert(State.MIs[InsnID] && "Attempted to erase an undefined instruction"); State.MIs[InsnID]->eraseFromParent(); - DEBUG(dbgs() << "GIR_EraseFromParent(MIs[" << InsnID << "])\n"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_EraseFromParent(MIs[" + << InsnID << "])\n"); + break; + } + + case GIR_MakeTempReg: { + int64_t TempRegID = MatchTable[CurrentIdx++]; + int64_t TypeID = MatchTable[CurrentIdx++]; + + State.TempRegisters[TempRegID] = + MRI.createGenericVirtualRegister(MatcherInfo.TypeObjects[TypeID]); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": TempRegs[" << TempRegID + << "] = GIR_MakeTempReg(" << TypeID << ")\n"); + break; + } + + case GIR_Coverage: { + int64_t RuleID = MatchTable[CurrentIdx++]; + CoverageInfo.setCovered(RuleID); + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() + << CurrentIdx << ": GIR_Coverage(" << RuleID << ")"); break; } case GIR_Done: - DEBUG(dbgs() << "GIR_Done"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_Done"); return true; default: diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h new file mode 100644 index 0000000000000..e7945ff5bf4f2 --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -0,0 +1,287 @@ +//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --===========// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file contains some helper functions which try to cleanup artifacts +// such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make +// the types match. This file also contains some combines of merges that happens +// at the end of the legalization. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "legalizer" + +namespace llvm { +class LegalizationArtifactCombiner { + MachineIRBuilder &Builder; + MachineRegisterInfo &MRI; + const LegalizerInfo &LI; + +public: + LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, + const LegalizerInfo &LI) + : Builder(B), MRI(MRI), LI(LI) {} + + bool tryCombineAnyExt(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + if (MI.getOpcode() != TargetOpcode::G_ANYEXT) + return false; + if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, + MI.getOperand(1).getReg(), MRI)) { + DEBUG(dbgs() << ".. Combine MI: " << MI;); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = DefMI->getOperand(1).getReg(); + Builder.setInstr(MI); + // We get a copy/trunc/extend depending on the sizes + Builder.buildAnyExtOrTrunc(DstReg, SrcReg); + markInstAndDefDead(MI, *DefMI, DeadInsts); + return true; + } + return tryFoldImplicitDef(MI, DeadInsts); + } + + bool tryCombineZExt(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + + if (MI.getOpcode() != TargetOpcode::G_ZEXT) + return false; + if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, + MI.getOperand(1).getReg(), MRI)) { + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (isInstUnsupported(TargetOpcode::G_AND, DstTy) || + isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy)) + return false; + DEBUG(dbgs() << ".. Combine MI: " << MI;); + Builder.setInstr(MI); + unsigned ZExtSrc = MI.getOperand(1).getReg(); + LLT ZExtSrcTy = MRI.getType(ZExtSrc); + APInt Mask = APInt::getAllOnesValue(ZExtSrcTy.getSizeInBits()); + auto MaskCstMIB = Builder.buildConstant(DstTy, Mask.getZExtValue()); + unsigned TruncSrc = DefMI->getOperand(1).getReg(); + // We get a copy/trunc/extend depending on the sizes + auto SrcCopyOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc); + Builder.buildAnd(DstReg, SrcCopyOrTrunc, MaskCstMIB); + markInstAndDefDead(MI, *DefMI, DeadInsts); + return true; + } + return tryFoldImplicitDef(MI, DeadInsts); + } + + bool tryCombineSExt(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + + if (MI.getOpcode() != TargetOpcode::G_SEXT) + return false; + if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_TRUNC, + MI.getOperand(1).getReg(), MRI)) { + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (isInstUnsupported(TargetOpcode::G_SHL, DstTy) || + isInstUnsupported(TargetOpcode::G_ASHR, DstTy) || + isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy)) + return false; + DEBUG(dbgs() << ".. Combine MI: " << MI;); + Builder.setInstr(MI); + unsigned SExtSrc = MI.getOperand(1).getReg(); + LLT SExtSrcTy = MRI.getType(SExtSrc); + unsigned SizeDiff = DstTy.getSizeInBits() - SExtSrcTy.getSizeInBits(); + auto SizeDiffMIB = Builder.buildConstant(DstTy, SizeDiff); + unsigned TruncSrcReg = DefMI->getOperand(1).getReg(); + // We get a copy/trunc/extend depending on the sizes + auto SrcCopyExtOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrcReg); + auto ShlMIB = Builder.buildInstr(TargetOpcode::G_SHL, DstTy, + SrcCopyExtOrTrunc, SizeDiffMIB); + Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, ShlMIB, SizeDiffMIB); + markInstAndDefDead(MI, *DefMI, DeadInsts); + return true; + } + return tryFoldImplicitDef(MI, DeadInsts); + } + + /// Try to fold sb = EXTEND (G_IMPLICIT_DEF sa) -> sb = G_IMPLICIT_DEF + bool tryFoldImplicitDef(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_ANYEXT && Opcode != TargetOpcode::G_ZEXT && + Opcode != TargetOpcode::G_SEXT) + return false; + + if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, + MI.getOperand(1).getReg(), MRI)) { + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (isInstUnsupported(TargetOpcode::G_IMPLICIT_DEF, DstTy)) + return false; + DEBUG(dbgs() << ".. Combine EXT(IMPLICIT_DEF) " << MI;); + Builder.setInstr(MI); + Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, DstReg); + markInstAndDefDead(MI, *DefMI, DeadInsts); + return true; + } + return false; + } + + bool tryCombineMerges(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + + if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + return false; + + unsigned NumDefs = MI.getNumOperands() - 1; + unsigned SrcReg = MI.getOperand(NumDefs).getReg(); + MachineInstr *MergeI = MRI.getVRegDef(SrcReg); + if (!MergeI || (MergeI->getOpcode() != TargetOpcode::G_MERGE_VALUES)) + return false; + + const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; + + if (NumMergeRegs < NumDefs) { + if (NumDefs % NumMergeRegs != 0) + return false; + + Builder.setInstr(MI); + // Transform to UNMERGEs, for example + // %1 = G_MERGE_VALUES %4, %5 + // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 + // to + // %9, %10 = G_UNMERGE_VALUES %4 + // %11, %12 = G_UNMERGE_VALUES %5 + + const unsigned NewNumDefs = NumDefs / NumMergeRegs; + for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { + SmallVector<unsigned, 2> DstRegs; + for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; + ++j, ++DefIdx) + DstRegs.push_back(MI.getOperand(DefIdx).getReg()); + + Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); + } + + } else if (NumMergeRegs > NumDefs) { + if (NumMergeRegs % NumDefs != 0) + return false; + + Builder.setInstr(MI); + // Transform to MERGEs + // %6 = G_MERGE_VALUES %17, %18, %19, %20 + // %7, %8 = G_UNMERGE_VALUES %6 + // to + // %7 = G_MERGE_VALUES %17, %18 + // %8 = G_MERGE_VALUES %19, %20 + + const unsigned NumRegs = NumMergeRegs / NumDefs; + for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { + SmallVector<unsigned, 2> Regs; + for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; + ++j, ++Idx) + Regs.push_back(MergeI->getOperand(Idx).getReg()); + + Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + } + + } else { + // FIXME: is a COPY appropriate if the types mismatch? We know both + // registers are allocatable by now. + if (MRI.getType(MI.getOperand(0).getReg()) != + MRI.getType(MergeI->getOperand(1).getReg())) + return false; + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), + MergeI->getOperand(Idx + 1).getReg()); + } + + markInstAndDefDead(MI, *MergeI, DeadInsts); + return true; + } + + /// Try to combine away MI. + /// Returns true if it combined away the MI. + /// Adds instructions that are dead as a result of the combine + /// into DeadInsts, which can include MI. + bool tryCombineInstruction(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + switch (MI.getOpcode()) { + default: + return false; + case TargetOpcode::G_ANYEXT: + return tryCombineAnyExt(MI, DeadInsts); + case TargetOpcode::G_ZEXT: + return tryCombineZExt(MI, DeadInsts); + case TargetOpcode::G_SEXT: + return tryCombineSExt(MI, DeadInsts); + case TargetOpcode::G_UNMERGE_VALUES: + return tryCombineMerges(MI, DeadInsts); + case TargetOpcode::G_TRUNC: { + bool Changed = false; + for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg())) + Changed |= tryCombineInstruction(Use, DeadInsts); + return Changed; + } + } + } + +private: + /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be + /// dead due to MI being killed, then mark DefMI as dead too. + /// Some of the combines (extends(trunc)), try to walk through redundant + /// copies in between the extends and the truncs, and this attempts to collect + /// the in between copies if they're dead. + void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, + SmallVectorImpl<MachineInstr *> &DeadInsts) { + DeadInsts.push_back(&MI); + + // Collect all the copy instructions that are made dead, due to deleting + // this instruction. Collect all of them until the Trunc(DefMI). + // Eg, + // %1(s1) = G_TRUNC %0(s32) + // %2(s1) = COPY %1(s1) + // %3(s1) = COPY %2(s1) + // %4(s32) = G_ANYEXT %3(s1) + // In this case, we would have replaced %4 with a copy of %0, + // and as a result, %3, %2, %1 are dead. + MachineInstr *PrevMI = &MI; + while (PrevMI != &DefMI) { + // If we're dealing with G_UNMERGE_VALUES, tryCombineMerges doesn't really try + // to fold copies in between and we can ignore them here. + if (PrevMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) + break; + unsigned PrevRegSrc = PrevMI->getOperand(1).getReg(); + MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); + if (MRI.hasOneUse(PrevRegSrc)) { + if (TmpDef != &DefMI) { + assert(TmpDef->getOpcode() == TargetOpcode::COPY && + "Expecting copy here"); + DeadInsts.push_back(TmpDef); + } + } else + break; + PrevMI = TmpDef; + } + if ((PrevMI == &DefMI || + DefMI.getOpcode() == TargetOpcode::G_MERGE_VALUES) && + MRI.hasOneUse(DefMI.getOperand(0).getReg())) + DeadInsts.push_back(&DefMI); + } + + /// Checks if the target legalizer info has specified anything about the + /// instruction, or if unsupported. + bool isInstUnsupported(unsigned Opcode, const LLT &DstTy) const { + auto Action = LI.getAction({Opcode, 0, DstTy}); + return Action.first == LegalizerInfo::LegalizeAction::Unsupported || + Action.first == LegalizerInfo::LegalizeAction::NotFound; + } +}; + +} // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/Legalizer.h b/include/llvm/CodeGen/GlobalISel/Legalizer.h index 9b9b8b563a30e..8284ab6dac65e 100644 --- a/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ b/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -58,9 +58,6 @@ public: bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII); - bool combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII, MachineIRBuilder &MIRBuilder); - bool runOnMachineFunction(MachineFunction &MF) override; }; } // End namespace llvm. diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 1fd45b52e3ac7..8bd8a9dcd0e24 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -89,6 +89,9 @@ public: /// functions MachineIRBuilder MIRBuilder; + /// Expose LegalizerInfo so the clients can re-use. + const LegalizerInfo &getLegalizerInfo() const { return LI; } + private: /// Helper function to split a wide generic register into bitwise blocks with diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index c259e93fdd366..b6735d538b37c 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -20,11 +20,12 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Target/TargetOpcodes.h" -#include <cstdint> #include <cassert> +#include <cstdint> #include <tuple> +#include <unordered_map> #include <utility> namespace llvm { @@ -120,27 +121,144 @@ public: } } + typedef std::pair<uint16_t, LegalizeAction> SizeAndAction; + typedef std::vector<SizeAndAction> SizeAndActionsVec; + using SizeChangeStrategy = + std::function<SizeAndActionsVec(const SizeAndActionsVec &v)>; + /// More friendly way to set an action for common types that have an LLT /// representation. + /// The LegalizeAction must be one for which NeedsLegalizingToDifferentSize + /// returns false. void setAction(const InstrAspect &Aspect, LegalizeAction Action) { + assert(!needsLegalizingToDifferentSize(Action)); TablesInitialized = false; - unsigned Opcode = Aspect.Opcode - FirstOp; - if (Actions[Opcode].size() <= Aspect.Idx) - Actions[Opcode].resize(Aspect.Idx + 1); - Actions[Aspect.Opcode - FirstOp][Aspect.Idx][Aspect.Type] = Action; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + if (SpecifiedActions[OpcodeIdx].size() <= Aspect.Idx) + SpecifiedActions[OpcodeIdx].resize(Aspect.Idx + 1); + SpecifiedActions[OpcodeIdx][Aspect.Idx][Aspect.Type] = Action; } - /// If an operation on a given vector type (say <M x iN>) isn't explicitly - /// specified, we proceed in 2 stages. First we legalize the underlying scalar - /// (so that there's at least one legal vector with that scalar), then we - /// adjust the number of elements in the vector so that it is legal. The - /// desired action in the first step is controlled by this function. - void setScalarInVectorAction(unsigned Opcode, LLT ScalarTy, - LegalizeAction Action) { - assert(!ScalarTy.isVector()); - ScalarInVectorActions[std::make_pair(Opcode, ScalarTy)] = Action; + /// The setAction calls record the non-size-changing legalization actions + /// to take on specificly-sized types. The SizeChangeStrategy defines what + /// to do when the size of the type needs to be changed to reach a legally + /// sized type (i.e., one that was defined through a setAction call). + /// e.g. + /// setAction ({G_ADD, 0, LLT::scalar(32)}, Legal); + /// setLegalizeScalarToDifferentSizeStrategy( + /// G_ADD, 0, widenToLargerTypesAndNarrowToLargest); + /// will end up defining getAction({G_ADD, 0, T}) to return the following + /// actions for different scalar types T: + /// LLT::scalar(1)..LLT::scalar(31): {WidenScalar, 0, LLT::scalar(32)} + /// LLT::scalar(32): {Legal, 0, LLT::scalar(32)} + /// LLT::scalar(33)..: {NarrowScalar, 0, LLT::scalar(32)} + /// + /// If no SizeChangeAction gets defined, through this function, + /// the default is unsupportedForDifferentSizes. + void setLegalizeScalarToDifferentSizeStrategy(const unsigned Opcode, + const unsigned TypeIdx, + SizeChangeStrategy S) { + const unsigned OpcodeIdx = Opcode - FirstOp; + if (ScalarSizeChangeStrategies[OpcodeIdx].size() <= TypeIdx) + ScalarSizeChangeStrategies[OpcodeIdx].resize(TypeIdx + 1); + ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] = S; + } + + /// See also setLegalizeScalarToDifferentSizeStrategy. + /// This function allows to set the SizeChangeStrategy for vector elements. + void setLegalizeVectorElementToDifferentSizeStrategy(const unsigned Opcode, + const unsigned TypeIdx, + SizeChangeStrategy S) { + const unsigned OpcodeIdx = Opcode - FirstOp; + if (VectorElementSizeChangeStrategies[OpcodeIdx].size() <= TypeIdx) + VectorElementSizeChangeStrategies[OpcodeIdx].resize(TypeIdx + 1); + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] = S; + } + + /// A SizeChangeStrategy for the common case where legalization for a + /// particular operation consists of only supporting a specific set of type + /// sizes. E.g. + /// setAction ({G_DIV, 0, LLT::scalar(32)}, Legal); + /// setAction ({G_DIV, 0, LLT::scalar(64)}, Legal); + /// setLegalizeScalarToDifferentSizeStrategy( + /// G_DIV, 0, unsupportedForDifferentSizes); + /// will result in getAction({G_DIV, 0, T}) to return Legal for s32 and s64, + /// and Unsupported for all other scalar types T. + static SizeAndActionsVec + unsupportedForDifferentSizes(const SizeAndActionsVec &v) { + return increaseToLargerTypesAndDecreaseToLargest(v, Unsupported, + Unsupported); } + /// A SizeChangeStrategy for the common case where legalization for a + /// particular operation consists of widening the type to a large legal type, + /// unless there is no such type and then instead it should be narrowed to the + /// largest legal type. + static SizeAndActionsVec + widenToLargerTypesAndNarrowToLargest(const SizeAndActionsVec &v) { + assert(v.size() > 0 && + "At least one size that can be legalized towards is needed" + " for this SizeChangeStrategy"); + return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar, + NarrowScalar); + } + + static SizeAndActionsVec + widenToLargerTypesUnsupportedOtherwise(const SizeAndActionsVec &v) { + return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar, + Unsupported); + } + + static SizeAndActionsVec + narrowToSmallerAndUnsupportedIfTooSmall(const SizeAndActionsVec &v) { + return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar, + Unsupported); + } + + static SizeAndActionsVec + narrowToSmallerAndWidenToSmallest(const SizeAndActionsVec &v) { + assert(v.size() > 0 && + "At least one size that can be legalized towards is needed" + " for this SizeChangeStrategy"); + return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar, + WidenScalar); + } + + /// A SizeChangeStrategy for the common case where legalization for a + /// particular vector operation consists of having more elements in the + /// vector, to a type that is legal. Unless there is no such type and then + /// instead it should be legalized towards the widest vector that's still + /// legal. E.g. + /// setAction({G_ADD, LLT::vector(8, 8)}, Legal); + /// setAction({G_ADD, LLT::vector(16, 8)}, Legal); + /// setAction({G_ADD, LLT::vector(2, 32)}, Legal); + /// setAction({G_ADD, LLT::vector(4, 32)}, Legal); + /// setLegalizeVectorElementToDifferentSizeStrategy( + /// G_ADD, 0, moreToWiderTypesAndLessToWidest); + /// will result in the following getAction results: + /// * getAction({G_ADD, LLT::vector(8,8)}) returns + /// (Legal, vector(8,8)). + /// * getAction({G_ADD, LLT::vector(9,8)}) returns + /// (MoreElements, vector(16,8)). + /// * getAction({G_ADD, LLT::vector(8,32)}) returns + /// (FewerElements, vector(4,32)). + static SizeAndActionsVec + moreToWiderTypesAndLessToWidest(const SizeAndActionsVec &v) { + return increaseToLargerTypesAndDecreaseToLargest(v, MoreElements, + FewerElements); + } + + /// Helper function to implement many typical SizeChangeStrategy functions. + static SizeAndActionsVec + increaseToLargerTypesAndDecreaseToLargest(const SizeAndActionsVec &v, + LegalizeAction IncreaseAction, + LegalizeAction DecreaseAction); + /// Helper function to implement many typical SizeChangeStrategy functions. + static SizeAndActionsVec + decreaseToSmallerTypesAndIncreaseToSmallest(const SizeAndActionsVec &v, + LegalizeAction DecreaseAction, + LegalizeAction IncreaseAction); + /// Determine what action should be taken to legalize the given generic /// instruction opcode, type-index and type. Requires computeTables to have /// been called. @@ -158,55 +276,6 @@ public: std::tuple<LegalizeAction, unsigned, LLT> getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; - /// Iterate the given function (typically something like doubling the width) - /// on Ty until we find a legal type for this operation. - Optional<LLT> findLegalizableSize(const InstrAspect &Aspect, - function_ref<LLT(LLT)> NextType) const { - LegalizeAction Action; - const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx]; - LLT Ty = Aspect.Type; - do { - Ty = NextType(Ty); - auto ActionIt = Map.find(Ty); - if (ActionIt == Map.end()) { - auto DefaultIt = DefaultActions.find(Aspect.Opcode); - if (DefaultIt == DefaultActions.end()) - return None; - Action = DefaultIt->second; - } else - Action = ActionIt->second; - } while (needsLegalizingToDifferentSize(Action)); - return Ty; - } - - /// Find what type it's actually OK to perform the given operation on, given - /// the general approach we've decided to take. - Optional<LLT> findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const; - - std::pair<LegalizeAction, LLT> findLegalAction(const InstrAspect &Aspect, - LegalizeAction Action) const { - auto LegalType = findLegalType(Aspect, Action); - if (!LegalType) - return std::make_pair(LegalizeAction::Unsupported, LLT()); - return std::make_pair(Action, *LegalType); - } - - /// Find the specified \p Aspect in the primary (explicitly set) Actions - /// table. Returns either the action the target requested or NotFound if there - /// was no setAction call. - LegalizeAction findInActions(const InstrAspect &Aspect) const { - if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) - return NotFound; - if (Aspect.Idx >= Actions[Aspect.Opcode - FirstOp].size()) - return NotFound; - const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx]; - auto ActionIt = Map.find(Aspect.Type); - if (ActionIt == Map.end()) - return NotFound; - - return ActionIt->second; - } - bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; virtual bool legalizeCustom(MachineInstr &MI, @@ -214,20 +283,181 @@ public: MachineIRBuilder &MIRBuilder) const; private: - static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START; - static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; + /// The SizeAndActionsVec is a representation mapping between all natural + /// numbers and an Action. The natural number represents the bit size of + /// the InstrAspect. For example, for a target with native support for 32-bit + /// and 64-bit additions, you'd express that as: + /// setScalarAction(G_ADD, 0, + /// {{1, WidenScalar}, // bit sizes [ 1, 31[ + /// {32, Legal}, // bit sizes [32, 33[ + /// {33, WidenScalar}, // bit sizes [33, 64[ + /// {64, Legal}, // bit sizes [64, 65[ + /// {65, NarrowScalar} // bit sizes [65, +inf[ + /// }); + /// It may be that only 64-bit pointers are supported on your target: + /// setPointerAction(G_GEP, 0, LLT:pointer(1), + /// {{1, Unsupported}, // bit sizes [ 1, 63[ + /// {64, Legal}, // bit sizes [64, 65[ + /// {65, Unsupported}, // bit sizes [65, +inf[ + /// }); + void setScalarAction(const unsigned Opcode, const unsigned TypeIndex, + const SizeAndActionsVec &SizeAndActions) { + const unsigned OpcodeIdx = Opcode - FirstOp; + SmallVector<SizeAndActionsVec, 1> &Actions = ScalarActions[OpcodeIdx]; + setActions(TypeIndex, Actions, SizeAndActions); + } + void setPointerAction(const unsigned Opcode, const unsigned TypeIndex, + const unsigned AddressSpace, + const SizeAndActionsVec &SizeAndActions) { + const unsigned OpcodeIdx = Opcode - FirstOp; + if (AddrSpace2PointerActions[OpcodeIdx].find(AddressSpace) == + AddrSpace2PointerActions[OpcodeIdx].end()) + AddrSpace2PointerActions[OpcodeIdx][AddressSpace] = {{}}; + SmallVector<SizeAndActionsVec, 1> &Actions = + AddrSpace2PointerActions[OpcodeIdx].find(AddressSpace)->second; + setActions(TypeIndex, Actions, SizeAndActions); + } - using TypeMap = DenseMap<LLT, LegalizeAction>; - using SIVActionMap = DenseMap<std::pair<unsigned, LLT>, LegalizeAction>; + /// If an operation on a given vector type (say <M x iN>) isn't explicitly + /// specified, we proceed in 2 stages. First we legalize the underlying scalar + /// (so that there's at least one legal vector with that scalar), then we + /// adjust the number of elements in the vector so that it is legal. The + /// desired action in the first step is controlled by this function. + void setScalarInVectorAction(const unsigned Opcode, const unsigned TypeIndex, + const SizeAndActionsVec &SizeAndActions) { + unsigned OpcodeIdx = Opcode - FirstOp; + SmallVector<SizeAndActionsVec, 1> &Actions = + ScalarInVectorActions[OpcodeIdx]; + setActions(TypeIndex, Actions, SizeAndActions); + } + + /// See also setScalarInVectorAction. + /// This function let's you specify the number of elements in a vector that + /// are legal for a legal element size. + void setVectorNumElementAction(const unsigned Opcode, + const unsigned TypeIndex, + const unsigned ElementSize, + const SizeAndActionsVec &SizeAndActions) { + const unsigned OpcodeIdx = Opcode - FirstOp; + if (NumElements2Actions[OpcodeIdx].find(ElementSize) == + NumElements2Actions[OpcodeIdx].end()) + NumElements2Actions[OpcodeIdx][ElementSize] = {{}}; + SmallVector<SizeAndActionsVec, 1> &Actions = + NumElements2Actions[OpcodeIdx].find(ElementSize)->second; + setActions(TypeIndex, Actions, SizeAndActions); + } - SmallVector<TypeMap, 1> Actions[LastOp - FirstOp + 1]; - SIVActionMap ScalarInVectorActions; - DenseMap<std::pair<unsigned, LLT>, uint16_t> MaxLegalVectorElts; - DenseMap<unsigned, LegalizeAction> DefaultActions; + /// A partial SizeAndActionsVec potentially doesn't cover all bit sizes, + /// i.e. it's OK if it doesn't start from size 1. + static void checkPartialSizeAndActionsVector(const SizeAndActionsVec& v) { +#ifndef NDEBUG + // The sizes should be in increasing order + int prev_size = -1; + for(auto SizeAndAction: v) { + assert(SizeAndAction.first > prev_size); + prev_size = SizeAndAction.first; + } + // - for every Widen action, there should be a larger bitsize that + // can be legalized towards (e.g. Legal, Lower, Libcall or Custom + // action). + // - for every Narrow action, there should be a smaller bitsize that + // can be legalized towards. + int SmallestNarrowIdx = -1; + int LargestWidenIdx = -1; + int SmallestLegalizableToSameSizeIdx = -1; + int LargestLegalizableToSameSizeIdx = -1; + for(size_t i=0; i<v.size(); ++i) { + switch (v[i].second) { + case FewerElements: + case NarrowScalar: + if (SmallestNarrowIdx == -1) + SmallestNarrowIdx = i; + break; + case WidenScalar: + case MoreElements: + LargestWidenIdx = i; + break; + case Unsupported: + break; + default: + if (SmallestLegalizableToSameSizeIdx == -1) + SmallestLegalizableToSameSizeIdx = i; + LargestLegalizableToSameSizeIdx = i; + } + } + if (SmallestNarrowIdx != -1) { + assert(SmallestLegalizableToSameSizeIdx != -1); + assert(SmallestNarrowIdx > SmallestLegalizableToSameSizeIdx); + } + if (LargestWidenIdx != -1) + assert(LargestWidenIdx < LargestLegalizableToSameSizeIdx); +#endif + } + + /// A full SizeAndActionsVec must cover all bit sizes, i.e. must start with + /// from size 1. + static void checkFullSizeAndActionsVector(const SizeAndActionsVec& v) { +#ifndef NDEBUG + // Data structure invariant: The first bit size must be size 1. + assert(v.size() >= 1); + assert(v[0].first == 1); + checkPartialSizeAndActionsVector(v); +#endif + } + + /// Sets actions for all bit sizes on a particular generic opcode, type + /// index and scalar or pointer type. + void setActions(unsigned TypeIndex, + SmallVector<SizeAndActionsVec, 1> &Actions, + const SizeAndActionsVec &SizeAndActions) { + checkFullSizeAndActionsVector(SizeAndActions); + if (Actions.size() <= TypeIndex) + Actions.resize(TypeIndex + 1); + Actions[TypeIndex] = SizeAndActions; + } + + static SizeAndAction findAction(const SizeAndActionsVec &Vec, + const uint32_t Size); + + /// Returns the next action needed to get the scalar or pointer type closer + /// to being legal + /// E.g. findLegalAction({G_REM, 13}) should return + /// (WidenScalar, 32). After that, findLegalAction({G_REM, 32}) will + /// probably be called, which should return (Lower, 32). + /// This is assuming the setScalarAction on G_REM was something like: + /// setScalarAction(G_REM, 0, + /// {{1, WidenScalar}, // bit sizes [ 1, 31[ + /// {32, Lower}, // bit sizes [32, 33[ + /// {33, NarrowScalar} // bit sizes [65, +inf[ + /// }); + std::pair<LegalizeAction, LLT> + findScalarLegalAction(const InstrAspect &Aspect) const; + + /// Returns the next action needed towards legalizing the vector type. + std::pair<LegalizeAction, LLT> + findVectorLegalAction(const InstrAspect &Aspect) const; + + static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START; + static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; - bool TablesInitialized = false; + // Data structures used temporarily during construction of legality data: + typedef DenseMap<LLT, LegalizeAction> TypeMap; + SmallVector<TypeMap, 1> SpecifiedActions[LastOp - FirstOp + 1]; + SmallVector<SizeChangeStrategy, 1> + ScalarSizeChangeStrategies[LastOp - FirstOp + 1]; + SmallVector<SizeChangeStrategy, 1> + VectorElementSizeChangeStrategies[LastOp - FirstOp + 1]; + bool TablesInitialized; + + // Data structures used by getAction: + SmallVector<SizeAndActionsVec, 1> ScalarActions[LastOp - FirstOp + 1]; + SmallVector<SizeAndActionsVec, 1> ScalarInVectorActions[LastOp - FirstOp + 1]; + std::unordered_map<uint16_t, SmallVector<SizeAndActionsVec, 1>> + AddrSpace2PointerActions[LastOp - FirstOp + 1]; + std::unordered_map<uint16_t, SmallVector<SizeAndActionsVec, 1>> + NumElements2Actions[LastOp - FirstOp + 1]; }; -} // end namespace llvm +} // end namespace llvm. #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 85e6fef1f3c26..aa875c11d86fa 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -70,13 +70,33 @@ class MachineIRBuilder { return getMF().getRegInfo().createVirtualRegister(RC); } - unsigned getRegFromArg(unsigned Reg) { return Reg; } + void addUseFromArg(MachineInstrBuilder &MIB, unsigned Reg) { + MIB.addUse(Reg); + } + void addUseFromArg(MachineInstrBuilder &MIB, const MachineInstrBuilder &UseMIB) { + MIB.addUse(UseMIB->getOperand(0).getReg()); + } + + void addUsesFromArgs(MachineInstrBuilder &MIB) { } + template<typename UseArgTy, typename ... UseArgsTy> + void addUsesFromArgs(MachineInstrBuilder &MIB, UseArgTy &&Arg1, UseArgsTy &&... Args) { + addUseFromArg(MIB, Arg1); + addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...); + } + unsigned getRegFromArg(unsigned Reg) { return Reg; } unsigned getRegFromArg(const MachineInstrBuilder &MIB) { return MIB->getOperand(0).getReg(); } public: + /// Some constructors for easy use. + MachineIRBuilder() = default; + MachineIRBuilder(MachineFunction &MF) { setMF(MF); } + MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) { + setInstr(MI); + } + /// Getter for the function we currently build. MachineFunction &getMF() { assert(MF && "MachineFunction is not set"); @@ -146,9 +166,7 @@ public: MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, UseArgsTy &&... Args) { auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty)); - unsigned It[] = {(getRegFromArg(Args))...}; - for (const auto &i : It) - MIB.addUse(i); + addUsesFromArgs(MIB, std::forward<UseArgsTy>(Args)...); return MIB; } @@ -168,11 +186,12 @@ public: const MDNode *Expr); /// Build and insert a DBG_VALUE instruction expressing the fact that the - /// associated \p Variable lives in memory at \p Reg + \p Offset (suitably - /// modified by \p Expr). - MachineInstrBuilder buildIndirectDbgValue(unsigned Reg, unsigned Offset, + /// associated \p Variable lives in memory at \p Reg (suitably modified by \p + /// Expr). + MachineInstrBuilder buildIndirectDbgValue(unsigned Reg, const MDNode *Variable, const MDNode *Expr); + /// Build and insert a DBG_VALUE instruction expressing the fact that the /// associated \p Variable lives in the stack slot specified by \p FI /// (suitably modified by \p Expr). @@ -181,11 +200,11 @@ public: /// Build and insert a DBG_VALUE instructions specifying that \p Variable is /// given by \p C (suitably modified by \p Expr). - MachineInstrBuilder buildConstDbgValue(const Constant &C, unsigned Offset, + MachineInstrBuilder buildConstDbgValue(const Constant &C, const MDNode *Variable, const MDNode *Expr); - /// Build and insert \p Res<def> = G_FRAME_INDEX \p Idx + /// Build and insert \p Res = G_FRAME_INDEX \p Idx /// /// G_FRAME_INDEX materializes the address of an alloca value or other /// stack-based object. @@ -196,7 +215,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx); - /// Build and insert \p Res<def> = G_GLOBAL_VALUE \p GV + /// Build and insert \p Res = G_GLOBAL_VALUE \p GV /// /// G_GLOBAL_VALUE materializes the address of the specified global /// into \p Res. @@ -208,7 +227,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV); - /// Build and insert \p Res<def> = G_ADD \p Op0, \p Op1 + /// Build and insert \p Res = G_ADD \p Op0, \p Op1 /// /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1, /// truncated to their width. @@ -226,7 +245,7 @@ public: return buildAdd(Res, (getRegFromArg(UseArgs))...); } - /// Build and insert \p Res<def> = G_SUB \p Op0, \p Op1 + /// Build and insert \p Res = G_SUB \p Op0, \p Op1 /// /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1, /// truncated to their width. @@ -239,7 +258,7 @@ public: MachineInstrBuilder buildSub(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res<def> = G_MUL \p Op0, \p Op1 + /// Build and insert \p Res = G_MUL \p Op0, \p Op1 /// /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1, /// truncated to their width. @@ -252,7 +271,7 @@ public: MachineInstrBuilder buildMul(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res<def> = G_GEP \p Op0, \p Op1 + /// Build and insert \p Res = G_GEP \p Op0, \p Op1 /// /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0, /// storing the resulting pointer in \p Res. @@ -266,7 +285,7 @@ public: MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1); - /// Materialize and insert \p Res<def> = G_GEP \p Op0, (G_CONSTANT \p Value) + /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value) /// /// G_GEP adds \p Value bytes to the pointer specified by \p Op0, /// storing the resulting pointer in \p Res. If \p Value is zero then no @@ -286,7 +305,7 @@ public: const LLT &ValueTy, uint64_t Value); - /// Build and insert \p Res<def> = G_PTR_MASK \p Op0, \p NumBits + /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits /// /// G_PTR_MASK clears the low bits of a pointer operand without destroying its /// pointer properties. This has the effect of rounding the address *down* to @@ -302,7 +321,7 @@ public: MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits); - /// Build and insert \p Res<def>, \p CarryOut<def> = G_UADDE \p Op0, + /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0, /// \p Op1, \p CarryIn /// /// G_UADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit @@ -319,7 +338,7 @@ public: MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0, unsigned Op1, unsigned CarryIn); - /// Build and insert \p Res<def> = G_AND \p Op0, \p Op1 + /// Build and insert \p Res = G_AND \p Op0, \p Op1 /// /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p /// Op1. @@ -329,10 +348,14 @@ public: /// with the same (scalar or vector) type). /// /// \return a MachineInstrBuilder for the newly created instruction. + template <typename DstTy, typename... UseArgsTy> + MachineInstrBuilder buildAnd(DstTy &&Dst, UseArgsTy &&... UseArgs) { + return buildAnd(getDestFromArg(Dst), getRegFromArg(UseArgs)...); + } MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res<def> = G_OR \p Op0, \p Op1 + /// Build and insert \p Res = G_OR \p Op0, \p Op1 /// /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p /// Op1. @@ -344,7 +367,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res<def> = G_ANYEXT \p Op0 + /// Build and insert \p Res = G_ANYEXT \p Op0 /// /// G_ANYEXT produces a register of the specified width, with bits 0 to /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are unspecified @@ -357,9 +380,14 @@ public: /// \pre \p Op must be smaller than \p Res /// /// \return The newly created instruction. + MachineInstrBuilder buildAnyExt(unsigned Res, unsigned Op); + template <typename DstType, typename ArgType> + MachineInstrBuilder buildAnyExt(DstType &&Res, ArgType &&Arg) { + return buildAnyExt(getDestFromArg(Res), getRegFromArg(Arg)); + } - /// Build and insert \p Res<def> = G_SEXT \p Op + /// Build and insert \p Res = G_SEXT \p Op /// /// G_SEXT produces a register of the specified width, with bits 0 to /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are duplicated from the @@ -373,7 +401,7 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildSExt(unsigned Res, unsigned Op); - /// Build and insert \p Res<def> = G_ZEXT \p Op + /// Build and insert \p Res = G_ZEXT \p Op /// /// G_ZEXT produces a register of the specified width, with bits 0 to /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are 0. For a vector @@ -387,7 +415,7 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildZExt(unsigned Res, unsigned Op); - /// Build and insert \p Res<def> = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or + /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op. /// /// /// \pre setBasicBlock or setMI must have been called. @@ -397,7 +425,7 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op); - /// Build and insert \p Res<def> = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or + /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op. /// /// /// \pre setBasicBlock or setMI must have been called. @@ -407,6 +435,32 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op); + // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or + /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op. + /// /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with scalar or vector type. + /// \pre \p Op must be a generic virtual register with scalar or vector type. + /// + /// \return The newly created instruction. + template <typename DstTy, typename UseArgTy> + MachineInstrBuilder buildAnyExtOrTrunc(DstTy &&Dst, UseArgTy &&Use) { + return buildAnyExtOrTrunc(getDestFromArg(Dst), getRegFromArg(Use)); + } + MachineInstrBuilder buildAnyExtOrTrunc(unsigned Res, unsigned Op); + + /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p + /// Op, or \p Res = COPY \p Op depending on the differing sizes of \p Res and + /// \p Op. + /// /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res must be a generic virtual register with scalar or vector type. + /// \pre \p Op must be a generic virtual register with scalar or vector type. + /// + /// \return The newly created instruction. + MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, unsigned Res, + unsigned Op); + /// Build and insert an appropriate cast between two registers of equal size. MachineInstrBuilder buildCast(unsigned Dst, unsigned Src); @@ -480,7 +534,7 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val); - /// Build and insert \p Res<def> = COPY Op + /// Build and insert \p Res = COPY Op /// /// Register-to-register COPY sets \p Res to \p Op. /// @@ -488,8 +542,12 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildCopy(unsigned Res, unsigned Op); + template <typename DstType, typename SrcType> + MachineInstrBuilder buildCopy(DstType &&Res, SrcType &&Src) { + return buildCopy(getDestFromArg(Res), getRegFromArg(Src)); + } - /// Build and insert `Res<def> = G_LOAD Addr, MMO`. + /// Build and insert `Res = G_LOAD Addr, MMO`. /// /// Loads the value stored at \p Addr. Puts the result in \p Res. /// @@ -513,7 +571,7 @@ public: MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO); - /// Build and insert `Res0<def>, ... = G_EXTRACT Src, Idx0`. + /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`. /// /// \pre setBasicBlock or setMI must have been called. /// \pre \p Res and \p Src must be generic virtual registers. @@ -540,7 +598,7 @@ public: void buildSequence(unsigned Res, ArrayRef<unsigned> Ops, ArrayRef<uint64_t> Indices); - /// Build and insert \p Res<def> = G_MERGE_VALUES \p Op0, ... + /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... /// /// G_MERGE_VALUES combines the input elements contiguously into a larger /// register. @@ -553,7 +611,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildMerge(unsigned Res, ArrayRef<unsigned> Ops); - /// Build and insert \p Res0<def>, ... = G_UNMERGE_VALUES \p Op + /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op /// /// G_UNMERGE_VALUES splits contiguous bits of the input into multiple /// @@ -581,7 +639,7 @@ public: MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res, bool HasSideEffects); - /// Build and insert \p Res<def> = G_FPTRUNC \p Op + /// Build and insert \p Res = G_FPTRUNC \p Op /// /// G_FPTRUNC converts a floating-point value into one with a smaller type. /// @@ -593,7 +651,7 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op); - /// Build and insert \p Res<def> = G_TRUNC \p Op + /// Build and insert \p Res = G_TRUNC \p Op /// /// G_TRUNC extracts the low bits of a type. For a vector type each element is /// truncated independently before being packed into the destination. @@ -605,6 +663,10 @@ public: /// /// \return The newly created instruction. MachineInstrBuilder buildTrunc(unsigned Res, unsigned Op); + template <typename DstType, typename SrcType> + MachineInstrBuilder buildTrunc(DstType &&Res, SrcType &&Src) { + return buildTrunc(getDestFromArg(Res), getRegFromArg(Src)); + } /// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1 /// @@ -649,7 +711,7 @@ public: MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst, unsigned Op0, unsigned Op1); - /// Build and insert \p Res<def> = G_INSERT_VECTOR_ELT \p Val, + /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx /// /// \pre setBasicBlock or setMI must have been called. @@ -662,7 +724,7 @@ public: MachineInstrBuilder buildInsertVectorElement(unsigned Res, unsigned Val, unsigned Elt, unsigned Idx); - /// Build and insert \p Res<def> = G_EXTRACT_VECTOR_ELT \p Val, \p Idx + /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx /// /// \pre setBasicBlock or setMI must have been called. /// \pre \p Res must be a generic virtual register with scalar type. @@ -672,6 +734,24 @@ public: /// \return The newly created instruction. MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val, unsigned Idx); + + /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, + /// MMO`. + /// + /// Atomically replace the value at \p Addr with \p NewVal if it is currently + /// \p CmpVal otherwise leaves it unchanged. Puts the original value from \p + /// Addr in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register of scalar type. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, \p CmpVal, and \p NewVal must be generic virtual + /// registers of the same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO); }; } // End namespace llvm. diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index 60905c7ec226d..02868b220984d 100644 --- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -407,6 +407,10 @@ protected: mutable DenseMap<unsigned, std::unique_ptr<const InstructionMapping>> MapOfInstructionMappings; + /// Getting the minimal register class of a physreg is expensive. + /// Cache this information as we get it. + mutable DenseMap<unsigned, const TargetRegisterClass *> PhysRegMinimalRCs; + /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks /// RegisterBank instances. RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks); @@ -427,6 +431,11 @@ protected: return *RegBanks[ID]; } + /// Get the MinimalPhysRegClass for Reg. + /// \pre Reg is a physical register. + const TargetRegisterClass & + getMinimalPhysRegClass(unsigned Reg, const TargetRegisterInfo &TRI) const; + /// Try to get the mapping of \p MI. /// See getInstrMapping for more details on what a mapping represents. /// @@ -699,8 +708,8 @@ public: /// virtual register. /// /// \pre \p Reg != 0 (NoRegister). - static unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI); + unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; /// Check that information hold by this instance make sense for the /// given \p TRI. diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 50ddbeb9432a3..5864c15cc8eb9 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -79,5 +79,11 @@ Optional<int64_t> getConstantVRegVal(unsigned VReg, const ConstantFP* getConstantFPVRegVal(unsigned VReg, const MachineRegisterInfo &MRI); +/// See if Reg is defined by an single def instruction that is +/// Opcode. Also try to do trivial folding if it's a COPY with +/// same types. Returns null otherwise. +MachineInstr *getOpcodeDef(unsigned Opcode, unsigned Reg, + const MachineRegisterInfo &MRI); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index bc5d2353f63e3..d256849be9afb 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -186,7 +186,8 @@ namespace ISD { /// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways. /// Given two values of the same integer value type, this produces a value /// twice as big. Like EXTRACT_ELEMENT, this can only be used before - /// legalization. + /// legalization. The lower part of the composite value should be in + /// element 0 and the upper part should be in element 1. BUILD_PAIR, /// MERGE_VALUES - This node takes multiple discrete operands and returns @@ -263,6 +264,7 @@ namespace ISD { /// They are used to limit optimizations while the DAG is being /// optimized. STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM, + STRICT_FMA, /// Constrained versions of libm-equivalent floating point intrinsics. /// These will be lowered to the equivalent non-constrained pseudo-op @@ -637,6 +639,12 @@ namespace ISD { /// take a chain as input and return a chain. EH_LABEL, + /// ANNOTATION_LABEL - Represents a mid basic block label used by + /// annotations. This should remain within the basic block and be ordered + /// with respect to other call instructions, but loads and stores may float + /// past it. + ANNOTATION_LABEL, + /// CATCHPAD - Represents a catchpad instruction. CATCHPAD, @@ -831,7 +839,7 @@ namespace ISD { /// which do not reference a specific memory location should be less than /// this value. Those that do must not be less than this value, and can /// be used with SelectionDAG::getMemIntrinsicNode. - static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+300; + static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400; //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h index a404b9b70d3ac..597d684909c16 100644 --- a/include/llvm/CodeGen/IntrinsicLowering.h +++ b/include/llvm/CodeGen/IntrinsicLowering.h @@ -31,26 +31,22 @@ class IntrinsicLowering { public: explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {} - /// AddPrototypes - This method, if called, causes all of the prototypes - /// that might be needed by an intrinsic lowering implementation to be - /// inserted into the module specified. + /// Add all of the prototypes that might be needed by an intrinsic lowering + /// implementation to be inserted into the module specified. void AddPrototypes(Module &M); - /// LowerIntrinsicCall - This method replaces a call with the LLVM function - /// which should be used to implement the specified intrinsic function call. + /// Replace a call to the specified intrinsic function. /// If an intrinsic function must be implemented by the code generator /// (such as va_start), this function should print a message and abort. /// /// Otherwise, if an intrinsic function call can be lowered, the code to /// implement it (often a call to a non-intrinsic function) is inserted - /// _after_ the call instruction and the call is deleted. The caller must + /// _after_ the call instruction and the call is deleted. The caller must /// be capable of handling this kind of change. - /// void LowerIntrinsicCall(CallInst *CI); - /// LowerToByteSwap - Replace a call instruction into a call to bswap - /// intrinsic. Return false if it has determined the call is not a - /// simple integer bswap. + /// Try to replace a call instruction with a call to a bswap intrinsic. Return + /// false if the call is not a simple integer bswap. static bool LowerToByteSwap(CallInst *CI); }; } diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index f347f66e0981c..988e6d6cb3a3c 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -22,7 +22,7 @@ namespace llvm { class LatencyPriorityQueue; /// Sorting functions for the Available queue. - struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> { + struct latency_sort { LatencyPriorityQueue *PQ; explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {} diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervals.h index 820e883624837..1150f3c1c47b4 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -1,4 +1,4 @@ -//===- LiveIntervalAnalysis.h - Live Interval Analysis ----------*- C++ -*-===// +//===- LiveIntervals.h - Live Interval Analysis -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H -#define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#ifndef LLVM_CODEGEN_LIVEINTERVALS_H +#define LLVM_CODEGEN_LIVEINTERVALS_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IndexedMap.h" @@ -28,11 +28,11 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstdint> #include <utility> @@ -107,6 +107,11 @@ class VirtRegMap; const MachineBlockFrequencyInfo *MBFI, const MachineInstr &Instr); + /// Calculate the spill weight to assign to a single instruction. + static float getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineBasicBlock *MBB); + LiveInterval &getInterval(unsigned Reg) { if (hasInterval(Reg)) return *VirtRegIntervals[Reg]; @@ -473,4 +478,4 @@ class VirtRegMap; } // end namespace llvm -#endif // LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#endif diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h index f9c741dd75b2d..f9aab0d09e1f7 100644 --- a/include/llvm/CodeGen/LivePhysRegs.h +++ b/include/llvm/CodeGen/LivePhysRegs.h @@ -20,11 +20,11 @@ /// register. /// /// X86 Example: -/// %YMM0<def> = ... -/// %XMM0<def> = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) +/// %ymm0 = ... +/// %xmm0 = ... (Kills %xmm0, all %xmm0s sub-registers, and %ymm0) /// -/// %YMM0<def> = ... -/// %XMM0<def> = ..., %YMM0<imp-use> (%YMM0 and all its sub-registers are alive) +/// %ymm0 = ... +/// %xmm0 = ..., implicit %ymm0 (%ymm0 and all its sub-registers are alive) //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H @@ -32,8 +32,8 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <utility> @@ -108,6 +108,12 @@ public: /// Returns true if register \p Reg and no aliasing register is in the set. bool available(const MachineRegisterInfo &MRI, unsigned Reg) const; + /// Remove defined registers and regmask kills from the set. + void removeDefs(const MachineInstr &MI); + + /// Add uses to the set. + void addUses(const MachineInstr &MI); + /// Simulates liveness when stepping backwards over an instruction(bundle). /// Remove Defs, add uses. This is the recommended way of calculating /// liveness. @@ -152,6 +158,10 @@ private: /// \brief Adds live-in registers from basic block \p MBB, taking associated /// lane masks into consideration. void addBlockLiveIns(const MachineBasicBlock &MBB); + + /// Adds pristine registers. Pristine registers are callee saved registers + /// that are unused in the function. + void addPristines(const MachineFunction &MF); }; inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { @@ -159,12 +169,21 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { return OS; } -/// \brief Computes the live-in list for \p MBB assuming all of its successors -/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p -/// LiveRegs; This is just here to avoid repeated heap allocations when calling -/// this multiple times in a pass. -void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI, - MachineBasicBlock &MBB); +/// \brief Computes registers live-in to \p MBB assuming all of its successors +/// live-in lists are up-to-date. Puts the result into the given LivePhysReg +/// instance \p LiveRegs. +void computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB); + +/// Recomputes dead and kill flags in \p MBB. +void recomputeLivenessFlags(MachineBasicBlock &MBB); + +/// Adds registers contained in \p LiveRegs to the block live-in list of \p MBB. +/// Does not add reserved registers. +void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); + +/// Convenience function combining computeLiveIns() and addLiveIns(). +void computeAndAddLiveIns(LivePhysRegs &LiveRegs, + MachineBasicBlock &MBB); } // end namespace llvm diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index 362d9854a271a..84bccde0caa23 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -29,7 +29,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include <cassert> namespace llvm { diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index c28b1a06854fc..dc4956da9637c 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -16,9 +16,9 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cstdint> namespace llvm { @@ -51,7 +51,7 @@ public: void clear() { Units.reset(); } /// Returns true if the set is empty. - bool empty() const { return Units.empty(); } + bool empty() const { return Units.none(); } /// Adds register units covered by physical register \p Reg. void addReg(unsigned Reg) { @@ -123,6 +123,11 @@ public: const BitVector &getBitVector() const { return Units; } + +private: + /// Adds pristine registers. Pristine registers are callee saved registers + /// that are unused in the function. + void addPristines(const MachineFunction &MF); }; } // end namespace llvm diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h index d6e947c03dbdc..ed8da8662106d 100644 --- a/include/llvm/CodeGen/LiveVariables.h +++ b/include/llvm/CodeGen/LiveVariables.h @@ -36,7 +36,7 @@ #include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" namespace llvm { diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 1b1ba6a05837c..ba40e522e261f 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -12,12 +12,18 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H -#define LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H +#ifndef LLVM_CODEGEN_MIRYAMLMAPPING_H +#define LLVM_CODEGEN_MIRYAMLMAPPING_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdint> +#include <string> #include <vector> namespace llvm { @@ -29,7 +35,7 @@ struct StringValue { std::string Value; SMRange SourceRange; - StringValue() {} + StringValue() = default; StringValue(std::string Value) : Value(std::move(Value)) {} bool operator==(const StringValue &Other) const { @@ -38,7 +44,7 @@ struct StringValue { }; template <> struct ScalarTraits<StringValue> { - static void output(const StringValue &S, void *, llvm::raw_ostream &OS) { + static void output(const StringValue &S, void *, raw_ostream &OS) { OS << S.Value; } @@ -50,16 +56,16 @@ template <> struct ScalarTraits<StringValue> { return ""; } - static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; struct FlowStringValue : StringValue { - FlowStringValue() {} + FlowStringValue() = default; FlowStringValue(std::string Value) : StringValue(std::move(Value)) {} }; template <> struct ScalarTraits<FlowStringValue> { - static void output(const FlowStringValue &S, void *, llvm::raw_ostream &OS) { + static void output(const FlowStringValue &S, void *, raw_ostream &OS) { return ScalarTraits<StringValue>::output(S, nullptr, OS); } @@ -67,11 +73,12 @@ template <> struct ScalarTraits<FlowStringValue> { return ScalarTraits<StringValue>::input(Scalar, Ctx, S); } - static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; struct BlockStringValue { StringValue Value; + bool operator==(const BlockStringValue &Other) const { return Value == Other.Value; } @@ -90,10 +97,10 @@ template <> struct BlockScalarTraits<BlockStringValue> { /// A wrapper around unsigned which contains a source range that's being set /// during parsing. struct UnsignedValue { - unsigned Value; + unsigned Value = 0; SMRange SourceRange; - UnsignedValue() : Value(0) {} + UnsignedValue() = default; UnsignedValue(unsigned Value) : Value(Value) {} bool operator==(const UnsignedValue &Other) const { @@ -113,7 +120,7 @@ template <> struct ScalarTraits<UnsignedValue> { return ScalarTraits<unsigned>::input(Scalar, Ctx, Value.Value); } - static bool mustQuote(StringRef Scalar) { + static QuotingType mustQuote(StringRef Scalar) { return ScalarTraits<unsigned>::mustQuote(Scalar); } }; @@ -148,7 +155,9 @@ struct VirtualRegisterDefinition { UnsignedValue ID; StringValue Class; StringValue PreferredRegister; + // TODO: Serialize the target specific register hints. + bool operator==(const VirtualRegisterDefinition &Other) const { return ID == Other.ID && Class == Other.Class && PreferredRegister == Other.PreferredRegister; @@ -169,6 +178,7 @@ template <> struct MappingTraits<VirtualRegisterDefinition> { struct MachineFunctionLiveIn { StringValue Register; StringValue VirtualRegister; + bool operator==(const MachineFunctionLiveIn &Other) const { return Register == Other.Register && VirtualRegister == Other.VirtualRegister; @@ -202,16 +212,21 @@ struct MachineStackObject { int64_t Offset = 0; uint64_t Size = 0; unsigned Alignment = 0; + uint8_t StackID = 0; StringValue CalleeSavedRegister; + bool CalleeSavedRestored = true; Optional<int64_t> LocalOffset; StringValue DebugVar; StringValue DebugExpr; StringValue DebugLoc; + bool operator==(const MachineStackObject &Other) const { return ID == Other.ID && Name == Other.Name && Type == Other.Type && Offset == Other.Offset && Size == Other.Size && Alignment == Other.Alignment && + StackID == Other.StackID && CalleeSavedRegister == Other.CalleeSavedRegister && + CalleeSavedRestored == Other.CalleeSavedRestored && LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar && DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc; } @@ -237,8 +252,11 @@ template <> struct MappingTraits<MachineStackObject> { if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); + YamlIO.mapOptional("stack-id", Object.StackID); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, + true); YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional<int64_t>()); YamlIO.mapOptional("di-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. @@ -260,14 +278,19 @@ struct FixedMachineStackObject { int64_t Offset = 0; uint64_t Size = 0; unsigned Alignment = 0; + uint8_t StackID = 0; bool IsImmutable = false; bool IsAliased = false; StringValue CalleeSavedRegister; + bool CalleeSavedRestored = true; + bool operator==(const FixedMachineStackObject &Other) const { return ID == Other.ID && Type == Other.Type && Offset == Other.Offset && Size == Other.Size && Alignment == Other.Alignment && + StackID == Other.StackID && IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased && - CalleeSavedRegister == Other.CalleeSavedRegister; + CalleeSavedRegister == Other.CalleeSavedRegister && + CalleeSavedRestored == Other.CalleeSavedRestored; } }; @@ -289,12 +312,15 @@ template <> struct MappingTraits<FixedMachineStackObject> { YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); YamlIO.mapOptional("size", Object.Size, (uint64_t)0); YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); + YamlIO.mapOptional("stack-id", Object.StackID); if (Object.Type != FixedMachineStackObject::SpillSlot) { YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); YamlIO.mapOptional("isAliased", Object.IsAliased, false); } YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, + true); } static const bool flow = true; @@ -304,9 +330,12 @@ struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; unsigned Alignment = 0; + bool IsTargetSpecific = false; + bool operator==(const MachineConstantPoolValue &Other) const { return ID == Other.ID && Value == Other.Value && - Alignment == Other.Alignment; + Alignment == Other.Alignment && + IsTargetSpecific == Other.IsTargetSpecific; } }; @@ -315,6 +344,7 @@ template <> struct MappingTraits<MachineConstantPoolValue> { YamlIO.mapRequired("id", Constant.ID); YamlIO.mapOptional("value", Constant.Value, StringValue()); YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0); + YamlIO.mapOptional("isTargetSpecific", Constant.IsTargetSpecific, false); } }; @@ -322,6 +352,7 @@ struct MachineJumpTable { struct Entry { UnsignedValue ID; std::vector<FlowStringValue> Blocks; + bool operator==(const Entry &Other) const { return ID == Other.ID && Blocks == Other.Blocks; } @@ -329,6 +360,7 @@ struct MachineJumpTable { MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32; std::vector<Entry> Entries; + bool operator==(const MachineJumpTable &Other) const { return Kind == Other.Kind && Entries == Other.Entries; } @@ -387,6 +419,7 @@ struct MachineFrameInfo { bool HasMustTailInVarArgFunc = false; StringValue SavePoint; StringValue RestorePoint; + bool operator==(const MachineFrameInfo &Other) const { return IsFrameAddressTaken == Other.IsFrameAddressTaken && IsReturnAddressTaken == Other.IsReturnAddressTaken && @@ -485,4 +518,4 @@ template <> struct MappingTraits<MachineFunction> { } // end namespace yaml } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MIRYAMLMAPPING_H diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 97a49ce4dc4fa..0c9110cbaa876 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -25,6 +25,7 @@ #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Printable.h" #include <cassert> #include <cstdint> #include <functional> @@ -97,6 +98,8 @@ private: using const_probability_iterator = std::vector<BranchProbability>::const_iterator; + Optional<uint64_t> IrrLoopHeaderWeight; + /// Keep track of the physical registers that are livein of the basicblock. using LiveInVector = std::vector<RegisterMaskPair>; LiveInVector LiveIns; @@ -699,8 +702,8 @@ public: LQR_Unknown ///< Register liveness not decidable from local neighborhood. }; - /// Return whether (physical) register \p Reg has been <def>ined and not - /// <kill>ed as of just before \p Before. + /// Return whether (physical) register \p Reg has been defined and not + /// killed as of just before \p Before. /// /// Search is localised to a neighborhood of \p Neighborhood instructions /// before (searching for defs or kills) and \p Neighborhood instructions @@ -729,6 +732,14 @@ public: /// Return the MCSymbol for this basic block. MCSymbol *getSymbol() const; + Optional<uint64_t> getIrrLoopHeaderWeight() const { + return IrrLoopHeaderWeight; + } + + void setIrrLoopHeaderWeight(uint64_t Weight) { + IrrLoopHeaderWeight = Weight; + } + private: /// Return probability iterator corresponding to the I successor iterator. probability_iterator getProbabilityIterator(succ_iterator I); @@ -748,7 +759,7 @@ private: // Machine-CFG mutators - /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this + /// Add Pred as a predecessor of this MachineBasicBlock. Don't do this /// unless you know what you're doing, because it doesn't update Pred's /// successors list. Use Pred->addSuccessor instead. void addPredecessor(MachineBasicBlock *Pred); @@ -761,9 +772,17 @@ private: raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB); +/// Prints a machine basic block reference. +/// +/// The format is: +/// %bb.5 - a machine basic block with MBB.getNumber() == 5. +/// +/// Usage: OS << printMBBReference(MBB) << '\n'; +Printable printMBBReference(const MachineBasicBlock &MBB); + // This is useful when building IndexedMaps keyed on basic block pointers. -struct MBB2NumberFunctor : - public std::unary_function<const MachineBasicBlock*, unsigned> { +struct MBB2NumberFunctor { + using argument_type = const MachineBasicBlock *; unsigned operator()(const MachineBasicBlock *MBB) const { return MBB->getNumber(); } diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index cba79c818a761..5b4b99ca0a5d8 100644 --- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -62,6 +62,8 @@ public: Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const; Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const; + bool isIrrLoopHeader(const MachineBasicBlock *MBB); + const MachineFunction *getFunction() const; const MachineBranchProbabilityInfo *getMBPI() const; void view(const Twine &Name, bool isSimple = true) const; diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 8c54ae9254708..586535f771c28 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -68,12 +68,18 @@ enum class MachineCombinerPattern { FMLAv4i32_indexed_OP2, FMLSv1i32_indexed_OP2, FMLSv1i64_indexed_OP2, - FMLSv2i32_indexed_OP2, - FMLSv2i64_indexed_OP2, + FMLSv2f32_OP1, FMLSv2f32_OP2, + FMLSv2f64_OP1, FMLSv2f64_OP2, - FMLSv4i32_indexed_OP2, - FMLSv4f32_OP2 + FMLSv2i32_indexed_OP1, + FMLSv2i32_indexed_OP2, + FMLSv2i64_indexed_OP1, + FMLSv2i64_indexed_OP2, + FMLSv4f32_OP1, + FMLSv4f32_OP2, + FMLSv4i32_indexed_OP1, + FMLSv4i32_indexed_OP2 }; } // end namespace llvm diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h index 6efeefd9a7217..ffbcc62bfa36b 100644 --- a/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -39,7 +39,7 @@ public: DominanceFrontierBase<MachineBasicBlock, false> &getBase() { return Base; } - inline const std::vector<MachineBasicBlock *> &getRoots() const { + const SmallVectorImpl<MachineBasicBlock *> &getRoots() const { return Base.getRoots(); } diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 8bf98f6064956..98fdb51aae2fd 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -93,7 +93,7 @@ public: /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). /// - inline const std::vector<MachineBasicBlock*> &getRoots() const { + inline const SmallVectorImpl<MachineBasicBlock*> &getRoots() const { applySplitCriticalEdges(); return DT->getRoots(); } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 689f3cd9fd12b..f887517217e18 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -31,15 +31,30 @@ class AllocaInst; class CalleeSavedInfo { unsigned Reg; int FrameIdx; + /// Flag indicating whether the register is actually restored in the epilog. + /// In most cases, if a register is saved, it is also restored. There are + /// some situations, though, when this is not the case. For example, the + /// LR register on ARM is usually saved, but on exit from the function its + /// saved value may be loaded directly into PC. Since liveness tracking of + /// physical registers treats callee-saved registers are live outside of + /// the function, LR would be treated as live-on-exit, even though in these + /// scenarios it is not. This flag is added to indicate that the saved + /// register described by this object is not restored in the epilog. + /// The long-term solution is to model the liveness of callee-saved registers + /// by implicit uses on the return instructions, however, the required + /// changes in the ARM backend would be quite extensive. + bool Restored; public: explicit CalleeSavedInfo(unsigned R, int FI = 0) - : Reg(R), FrameIdx(FI) {} + : Reg(R), FrameIdx(FI), Restored(true) {} // Accessors. unsigned getReg() const { return Reg; } int getFrameIdx() const { return FrameIdx; } void setFrameIdx(int FI) { FrameIdx = FI; } + bool isRestored() const { return Restored; } + void setRestored(bool R) { Restored = R; } }; /// The MachineFrameInfo class represents an abstract stack frame until @@ -99,8 +114,16 @@ class MachineFrameInfo { /// and/or GC related) over a statepoint. We know that the address of the /// slot can't alias any LLVM IR value. This is very similar to a Spill /// Slot, but is created by statepoint lowering is SelectionDAG, not the - /// register allocator. - bool isStatepointSpillSlot; + /// register allocator. + bool isStatepointSpillSlot = false; + + /// Identifier for stack memory type analagous to address space. If this is + /// non-0, the meaning is target defined. Offsets cannot be directly + /// compared between objects with different stack IDs. The object may not + /// necessarily reside in the same contiguous memory block as other stack + /// objects. Objects with differing stack IDs should not be merged or + /// replaced substituted for each other. + uint8_t StackID; /// If this stack object is originated from an Alloca instruction /// this value saves the original IR allocation. Can be NULL. @@ -108,7 +131,7 @@ class MachineFrameInfo { // If true, the object was mapped into the local frame // block and doesn't need additional handling for allocation beyond that. - bool PreAllocated; + bool PreAllocated = false; // If true, an LLVM IR value might point to this object. // Normally, spill slots and fixed-offset objects don't alias IR-accessible @@ -117,16 +140,17 @@ class MachineFrameInfo { bool isAliased; /// If true, the object has been zero-extended. - bool isZExt; + bool isZExt = false; /// If true, the object has been zero-extended. - bool isSExt; - - StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, - bool isSS, const AllocaInst *Val, bool A) - : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), - isSpillSlot(isSS), isStatepointSpillSlot(false), Alloca(Val), - PreAllocated(false), isAliased(A), isZExt(false), isSExt(false) {} + bool isSExt = false; + + StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset, + bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, + bool IsAliased, uint8_t StackID = 0) + : SPOffset(SPOffset), Size(Size), Alignment(Alignment), + isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), + StackID(StackID), Alloca(Alloca), isAliased(IsAliased) {} }; /// The alignment of the stack. @@ -549,13 +573,13 @@ public: /// All fixed objects should be created before other objects are created for /// efficiency. By default, fixed objects are not pointed to by LLVM IR /// values. This returns an index with a negative value. - int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, + int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased = false); /// Create a spill slot at a fixed location on the stack. /// Returns an index with a negative value. int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, - bool Immutable = false); + bool IsImmutable = false); /// Returns true if the specified index corresponds to a fixed stack object. bool isFixedObjectIndex(int ObjectIdx) const { @@ -581,10 +605,10 @@ public: } /// Marks the immutability of an object. - void setIsImmutableObjectIndex(int ObjectIdx, bool Immutable) { + void setIsImmutableObjectIndex(int ObjectIdx, bool IsImmutable) { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - Objects[ObjectIdx+NumFixedObjects].isImmutable = Immutable; + Objects[ObjectIdx+NumFixedObjects].isImmutable = IsImmutable; } /// Returns true if the specified index corresponds to a spill slot. @@ -600,6 +624,18 @@ public: return Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot; } + /// \see StackID + uint8_t getStackID(int ObjectIdx) const { + return Objects[ObjectIdx+NumFixedObjects].StackID; + } + + /// \see StackID + void setStackID(int ObjectIdx, uint8_t ID) { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + Objects[ObjectIdx+NumFixedObjects].StackID = ID; + } + /// Returns true if the specified index corresponds to a dead object. bool isDeadObjectIndex(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && @@ -624,8 +660,8 @@ public: /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. - int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, - const AllocaInst *Alloca = nullptr); + int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, + const AllocaInst *Alloca = nullptr, uint8_t ID = 0); /// Create a new statically sized stack object that represents a spill slot, /// returning a nonnegative identifier to represent it. @@ -646,6 +682,8 @@ public: const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const { return CSInfo; } + /// \copydoc getCalleeSavedInfo() + std::vector<CalleeSavedInfo> &getCalleeSavedInfo() { return CSInfo; } /// Used by prolog/epilog inserter to set the function's callee saved /// information. diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 010d7032c516a..7d8b7ebe8d629 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -223,7 +223,7 @@ struct LandingPadInfo { }; class MachineFunction { - const Function *Fn; + const Function &F; const TargetMachine &Target; const TargetSubtargetInfo *STI; MCContext &Ctx; @@ -314,6 +314,9 @@ class MachineFunction { /// Map of invoke call site index values to associated begin EH_LABEL. DenseMap<MCSymbol*, unsigned> CallSiteMap; + /// CodeView label annotations. + std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations; + bool CallsEHReturn = false; bool CallsUnwindInit = false; bool HasEHFunclets = false; @@ -356,8 +359,9 @@ public: using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>; VariableDbgInfoMapTy VariableDbgInfos; - MachineFunction(const Function *Fn, const TargetMachine &TM, - unsigned FunctionNum, MachineModuleInfo &MMI); + MachineFunction(const Function &F, const TargetMachine &TM, + const TargetSubtargetInfo &STI, unsigned FunctionNum, + MachineModuleInfo &MMI); MachineFunction(const MachineFunction &) = delete; MachineFunction &operator=(const MachineFunction &) = delete; ~MachineFunction(); @@ -376,8 +380,8 @@ public: /// Return the DataLayout attached to the Module associated to this MF. const DataLayout &getDataLayout() const; - /// getFunction - Return the LLVM function that this machine code represents - const Function *getFunction() const { return Fn; } + /// Return the LLVM function that this machine code represents + const Function &getFunction() const { return F; } /// getName - Return the name of the corresponding LLVM function. StringRef getName() const; @@ -625,14 +629,23 @@ public: MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp = false); - /// CloneMachineInstr - Create a new MachineInstr which is a copy of the - /// 'Orig' instruction, identical in all ways except the instruction - /// has no parent, prev, or next. + /// Create a new MachineInstr which is a copy of \p Orig, identical in all + /// ways except the instruction has no parent, prev, or next. Bundling flags + /// are reset. /// - /// See also TargetInstrInfo::duplicate() for target-specific fixes to cloned - /// instructions. + /// Note: Clones a single instruction, not whole instruction bundles. + /// Does not perform target specific adjustments; consider using + /// TargetInstrInfo::duplicate() instead. MachineInstr *CloneMachineInstr(const MachineInstr *Orig); + /// Clones instruction or the whole instruction bundle \p Orig and insert + /// into \p MBB before \p InsertBefore. + /// + /// Note: Does not perform target specific adjustments; consider using + /// TargetInstrInfo::duplicate() intead. + MachineInstr &CloneMachineInstrBundle(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig); + /// DeleteMachineInstr - Delete the given MachineInstr. void DeleteMachineInstr(MachineInstr *MI); @@ -823,6 +836,15 @@ public: return CallSiteMap.count(BeginLabel); } + /// Record annotations associated with a particular label. + void addCodeViewAnnotation(MCSymbol *Label, MDNode *MD) { + CodeViewAnnotations.push_back({Label, MD}); + } + + ArrayRef<std::pair<MCSymbol *, MDNode *>> getCodeViewAnnotations() const { + return CodeViewAnnotations; + } + /// Return a reference to the C++ typeinfo for the current function. const std::vector<const GlobalValue *> &getTypeInfos() const { return TypeInfos; diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index b87aff102d478..3c1c1bb14f426 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -22,11 +22,11 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/ArrayRecycler.h" -#include "llvm/Target/TargetOpcodes.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -44,6 +44,7 @@ class MachineRegisterInfo; class ModuleSlotTracker; class raw_ostream; template <typename T> class SmallVectorImpl; +class SmallBitVector; class StringRef; class TargetInstrInfo; class TargetRegisterClass; @@ -67,7 +68,9 @@ public: /// otherwise easily derivable from the IR text. /// enum CommentFlag { - ReloadReuse = 0x1 // higher bits are reserved for target dep comments. + ReloadReuse = 0x1, // higher bits are reserved for target dep comments. + NoSchedComment = 0x2, + TAsmComments = 0x4 // Target Asm comments should start from this value. }; enum MIFlag { @@ -139,6 +142,17 @@ public: const MachineBasicBlock* getParent() const { return Parent; } MachineBasicBlock* getParent() { return Parent; } + /// Return the function that contains the basic block that this instruction + /// belongs to. + /// + /// Note: this is undefined behaviour if the instruction does not have a + /// parent. + const MachineFunction *getMF() const; + MachineFunction *getMF() { + return const_cast<MachineFunction *>( + static_cast<const MachineInstr *>(this)->getMF()); + } + /// Return the asm printer flags bitvector. uint8_t getAsmPrinterFlags() const { return AsmPrinterFlags; } @@ -290,6 +304,21 @@ public: return Operands[i]; } + /// Return true if operand \p OpIdx is a subregister index. + bool isOperandSubregIdx(unsigned OpIdx) const { + assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate && + "Expected MO_Immediate operand type."); + if (isExtractSubreg() && OpIdx == 2) + return true; + if (isInsertSubreg() && OpIdx == 3) + return true; + if (isRegSequence() && OpIdx > 1 && (OpIdx % 2) == 0) + return true; + if (isSubregToReg() && OpIdx == 3) + return true; + return false; + } + /// Returns the number of non-implicit operands. unsigned getNumExplicitOperands() const; @@ -771,9 +800,14 @@ public: bool isEHLabel() const { return getOpcode() == TargetOpcode::EH_LABEL; } bool isGCLabel() const { return getOpcode() == TargetOpcode::GC_LABEL; } + bool isAnnotationLabel() const { + return getOpcode() == TargetOpcode::ANNOTATION_LABEL; + } /// Returns true if the MachineInstr represents a label. - bool isLabel() const { return isEHLabel() || isGCLabel(); } + bool isLabel() const { + return isEHLabel() || isGCLabel() || isAnnotationLabel(); + } bool isCFIInstruction() const { return getOpcode() == TargetOpcode::CFI_INSTRUCTION; @@ -792,7 +826,10 @@ public: && getOperand(1).isImm(); } - bool isPHI() const { return getOpcode() == TargetOpcode::PHI; } + bool isPHI() const { + return getOpcode() == TargetOpcode::PHI || + getOpcode() == TargetOpcode::G_PHI; + } bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } @@ -869,6 +906,7 @@ public: return isMetaInstruction(); // Copy-like instructions are usually eliminated during register allocation. case TargetOpcode::PHI: + case TargetOpcode::G_PHI: case TargetOpcode::COPY: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: @@ -1185,6 +1223,15 @@ public: /// Debugging support /// @{ + /// Determine the generic type to be printed (if needed) on uses and defs. + LLT getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes, + const MachineRegisterInfo &MRI) const; + + /// Return true when an instruction has tied register that can't be determined + /// by the instruction's descriptor. This is useful for MIR printing, to + /// determine whether we need to print the ties or not. + bool hasComplexRegisterTies() const; + /// Print this MI to \p OS. /// Only print the defs and the opcode if \p SkipOpers is true. /// Otherwise, also print operands if \p SkipDebugLoc is true. diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 412c55d542ea6..e4f3976ec9504 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/ErrorHandling.h" @@ -48,6 +49,7 @@ namespace RegState { EarlyClobber = 0x40, Debug = 0x80, InternalRead = 0x100, + Renamable = 0x200, DefineNoRead = Define | Undef, ImplicitDefine = Implicit | Define, ImplicitKill = Implicit | Kill @@ -91,7 +93,8 @@ public: flags & RegState::EarlyClobber, SubReg, flags & RegState::Debug, - flags & RegState::InternalRead)); + flags & RegState::InternalRead, + flags & RegState::Renamable)); return *this; } @@ -396,28 +399,32 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, const DebugLoc &DL, } /// This version of the builder builds a DBG_VALUE intrinsic -/// for either a value in a register or a register-indirect+offset +/// for either a value in a register or a register-indirect /// address. The convention is that a DBG_VALUE is indirect iff the /// second operand is an immediate. MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, unsigned Offset, - const MDNode *Variable, const MDNode *Expr); + unsigned Reg, const MDNode *Variable, + const MDNode *Expr); /// This version of the builder builds a DBG_VALUE intrinsic -/// for either a value in a register or a register-indirect+offset +/// for either a value in a register or a register-indirect /// address and inserts it at position I. MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const DebugLoc &DL, const MCInstrDesc &MCID, bool IsIndirect, - unsigned Reg, unsigned Offset, - const MDNode *Variable, const MDNode *Expr); + unsigned Reg, const MDNode *Variable, + const MDNode *Expr); /// Clone a DBG_VALUE whose value has been spilled to FrameIndex. MachineInstr *buildDbgValueForSpill(MachineBasicBlock &BB, MachineBasicBlock::iterator I, const MachineInstr &Orig, int FrameIndex); +/// Update a DBG_VALUE whose value has been spilled to FrameIndex. Useful when +/// modifying an instruction in place while iterating over a basic block. +void updateDbgValueForSpill(MachineInstr &Orig, int FrameIndex); + inline unsigned getDefRegState(bool B) { return B ? RegState::Define : 0; } @@ -439,6 +446,9 @@ inline unsigned getInternalReadRegState(bool B) { inline unsigned getDebugRegState(bool B) { return B ? RegState::Debug : 0; } +inline unsigned getRenamableRegState(bool B) { + return B ? RegState::Renamable : 0; +} /// Get all register state flags from machine operand \p RegOp. inline unsigned getRegState(const MachineOperand &RegOp) { @@ -449,7 +459,10 @@ inline unsigned getRegState(const MachineOperand &RegOp) { getDeadRegState(RegOp.isDead()) | getUndefRegState(RegOp.isUndef()) | getInternalReadRegState(RegOp.isInternalRead()) | - getDebugRegState(RegOp.isDebug()); + getDebugRegState(RegOp.isDebug()) | + getRenamableRegState( + TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) && + RegOp.isRenamable()); } /// Helper class for constructing bundles of MachineInstrs. diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 995c7001d9282..b5341fd1ae496 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -150,7 +150,7 @@ public: /// struct VirtRegInfo { /// Reads - One of the operands read the virtual register. This does not - /// include <undef> or <internal> use operands, see MO::readsReg(). + /// include undef or internal use operands, see MO::readsReg(). bool Reads; /// Writes - One of the operands writes the virtual register. diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h index adcd1d0de63d3..25a3e6b556a3a 100644 --- a/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -20,6 +20,7 @@ #ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H +#include "llvm/Support/Printable.h" #include <cassert> #include <vector> @@ -125,6 +126,15 @@ public: void dump() const; }; + +/// Prints a jump table entry reference. +/// +/// The format is: +/// %jump-table.5 - a jump table entry with index == 5. +/// +/// Usage: OS << printJumpTableEntryReference(Idx) << '\n'; +Printable printJumpTableEntryReference(unsigned Idx); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index 58cffaade9d2a..104655e455246 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -44,8 +44,6 @@ extern template class LoopBase<MachineBasicBlock, MachineLoop>; class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> { public: - MachineLoop(); - /// Return the "top" block in the loop, which is the first block in the linear /// layout, ignoring any parts of the loop not contiguous with the part that /// contains the header. @@ -76,6 +74,8 @@ private: explicit MachineLoop(MachineBasicBlock *MBB) : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {} + + MachineLoop() = default; }; // Implementation in LoopInfoImpl.h diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index a9de0db05d72c..c5b204a79f040 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -45,18 +45,46 @@ struct MachinePointerInfo { /// Offset - This is an offset from the base Value*. int64_t Offset; - explicit MachinePointerInfo(const Value *v = nullptr, int64_t offset = 0) - : V(v), Offset(offset) {} + uint8_t StackID; - explicit MachinePointerInfo(const PseudoSourceValue *v, - int64_t offset = 0) - : V(v), Offset(offset) {} + unsigned AddrSpace = 0; + + explicit MachinePointerInfo(const Value *v, int64_t offset = 0, + uint8_t ID = 0) + : V(v), Offset(offset), StackID(ID) { + AddrSpace = v ? v->getType()->getPointerAddressSpace() : 0; + } + + explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0, + uint8_t ID = 0) + : V(v), Offset(offset), StackID(ID) { + AddrSpace = v ? v->getAddressSpace() : 0; + } + + explicit MachinePointerInfo(unsigned AddressSpace = 0) + : V((const Value *)nullptr), Offset(0), StackID(0), + AddrSpace(AddressSpace) {} + + explicit MachinePointerInfo( + PointerUnion<const Value *, const PseudoSourceValue *> v, + int64_t offset = 0, + uint8_t ID = 0) + : V(v), Offset(offset), StackID(ID) { + if (V) { + if (const auto *ValPtr = V.dyn_cast<const Value*>()) + AddrSpace = ValPtr->getType()->getPointerAddressSpace(); + else + AddrSpace = V.get<const PseudoSourceValue*>()->getAddressSpace(); + } + } MachinePointerInfo getWithOffset(int64_t O) const { - if (V.isNull()) return MachinePointerInfo(); + if (V.isNull()) + return MachinePointerInfo(AddrSpace); if (V.is<const Value*>()) - return MachinePointerInfo(V.get<const Value*>(), Offset+O); - return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O); + return MachinePointerInfo(V.get<const Value*>(), Offset+O, StackID); + return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O, + StackID); } /// Return true if memory region [V, V+Offset+Size) is known to be @@ -82,7 +110,11 @@ struct MachinePointerInfo { static MachinePointerInfo getGOT(MachineFunction &MF); /// Stack pointer relative access. - static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset); + static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, + uint8_t ID = 0); + + /// Stack memory without other information. + static MachinePointerInfo getUnknownStack(MachineFunction &MF); }; diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index d64941a9e725a..6be304fa368bb 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -125,6 +125,16 @@ class MachineModuleInfo : public ImmutablePass { /// comments in lib/Target/X86/X86FrameLowering.cpp for more details. bool UsesMorestackAddr; + /// True if the module contains split-stack functions. This is used to + /// emit .note.GNU-split-stack section as required by the linker for + /// special handling split-stack function calling no-split-stack function. + bool HasSplitStack; + + /// True if the module contains no-split-stack functions. This is used to + /// emit .note.GNU-no-split-stack section when it also contains split-stack + /// functions. + bool HasNosplitStack; + /// Maps IR Functions to their corresponding MachineFunctions. DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -145,7 +155,6 @@ public: const MCContext &getContext() const { return Context; } MCContext &getContext() { return Context; } - void setModule(const Module *M) { TheModule = M; } const Module *getModule() const { return TheModule; } /// Returns the MachineFunction constructed for the IR function \p F. @@ -194,6 +203,22 @@ public: UsesMorestackAddr = b; } + bool hasSplitStack() const { + return HasSplitStack; + } + + void setHasSplitStack(bool b) { + HasSplitStack = b; + } + + bool hasNosplitStack() const { + return HasNosplitStack; + } + + void setHasNosplitStack(bool b) { + HasNosplitStack = b; + } + /// Return the symbol to be used for the specified basic block when its /// address is taken. This cannot be its normal LBB label because the block /// may be accessed outside its containing function. diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h index 34b21ceddd434..6a87fa2fbf009 100644 --- a/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineModuleInfoImpls.h -------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineModuleInfoImpls.h --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,11 +15,12 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H -#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/ValueTypes.h" +#include <cassert> namespace llvm { + class MCSymbol; /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation @@ -36,6 +37,7 @@ class MachineModuleInfoMachO : public MachineModuleInfoImpl { DenseMap<MCSymbol *, StubValueTy> ThreadLocalGVStubs; virtual void anchor(); // Out of line virtual method. + public: MachineModuleInfoMachO(const MachineModuleInfo &) {} @@ -64,6 +66,7 @@ class MachineModuleInfoELF : public MachineModuleInfoImpl { DenseMap<MCSymbol *, StubValueTy> GVStubs; virtual void anchor(); // Out of line virtual method. + public: MachineModuleInfoELF(const MachineModuleInfo &) {} @@ -79,4 +82,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 2560399bcf545..ccf0917ed0851 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -14,8 +14,10 @@ #ifndef LLVM_CODEGEN_MACHINEOPERAND_H #define LLVM_CODEGEN_MACHINEOPERAND_H +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include <cassert> namespace llvm { @@ -65,6 +67,7 @@ public: MO_CFIIndex, ///< MCCFIInstruction index. MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel + MO_Last = MO_Predicate, }; private: @@ -83,24 +86,30 @@ private: /// before MachineInstr::tieOperands(). unsigned char TiedTo : 4; - /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register - /// operands. - /// IsDef - True if this is a def, false if this is a use of the register. + /// This is only valid on register operands. /// bool IsDef : 1; /// IsImp - True if this is an implicit def or use, false if it is explicit. + /// This is only valid on register opderands. /// bool IsImp : 1; - /// IsKill - True if this instruction is the last use of the register on this - /// path through the function. This is only valid on uses of registers. - bool IsKill : 1; - - /// IsDead - True if this register is never used by a subsequent instruction. - /// This is only valid on definitions of registers. - bool IsDead : 1; + /// IsDeadOrKill + /// For uses: IsKill - True if this instruction is the last use of the + /// register on this path through the function. + /// For defs: IsDead - True if this register is never used by a subsequent + /// instruction. + /// This is only valid on register operands. + bool IsDeadOrKill : 1; + + /// IsRenamable - True if this register may be renamed, i.e. it does not + /// generate a value that is somehow read in a way that is not represented by + /// the Machine IR (e.g. to meet an ABI or ISA requirement). This is only + /// valid on physical register operands. Virtual registers are assumed to + /// always be renamable regardless of the value of this field. + bool IsRenamable : 1; /// IsUndef - True if this register operand reads an "undef" value, i.e. the /// read value doesn't matter. This flag can be set on both use and def @@ -114,9 +123,9 @@ private: /// the same register. In that case, the instruction may depend on those /// operands reading the same dont-care value. For example: /// - /// %vreg1<def> = XOR %vreg2<undef>, %vreg2<undef> + /// %1 = XOR undef %2, undef %2 /// - /// Any register can be used for %vreg2, and its value doesn't matter, but + /// Any register can be used for %2, and its value doesn't matter, but /// the two operands must be the same register. /// bool IsUndef : 1; @@ -224,11 +233,50 @@ public: /// void clearParent() { ParentMI = nullptr; } + /// Print a subreg index operand. + /// MO_Immediate operands can also be subreg idices. If it's the case, the + /// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be + /// called to check this. + static void printSubregIdx(raw_ostream &OS, uint64_t Index, + const TargetRegisterInfo *TRI); + + /// Print operand target flags. + static void printTargetFlags(raw_ostream& OS, const MachineOperand &Op); + + /// Print a MCSymbol as an operand. + static void printSymbol(raw_ostream &OS, MCSymbol &Sym); + + /// Print a stack object reference. + static void printStackObjectReference(raw_ostream &OS, unsigned FrameIndex, + bool IsFixed, StringRef Name); + + /// Print the MachineOperand to \p os. + /// Providing a valid \p TRI and \p IntrinsicInfo results in a more + /// target-specific printing. If \p TRI and \p IntrinsicInfo are null, the + /// function will try to pick it up from the parent. void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr, const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; - void print(raw_ostream &os, ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI = nullptr, - const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; + + /// More complex way of printing a MachineOperand. + /// \param TypeToPrint specifies the generic type to be printed on uses and + /// defs. It can be determined using MachineInstr::getTypeToPrint. + /// \param PrintDef - whether we want to print `def` on an operand which + /// isDef. Sometimes, if the operand is printed before '=', we don't print + /// `def`. + /// \param ShouldPrintRegisterTies - whether we want to print register ties. + /// Sometimes they are easily determined by the instruction's descriptor + /// (MachineInstr::hasComplexRegiterTies can determine if it's needed). + /// \param TiedOperandIdx - if we need to print register ties this needs to + /// provide the index of the tied register. If not, it will be ignored. + /// \param TRI - provide more target-specific information to the printer. + /// Unlike the previous function, this one will not try and get the + /// information from it's parent. + /// \param IntrinsicInfo - same as \p TRI. + void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint, + bool PrintDef, bool ShouldPrintRegisterTies, + unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const; + void dump() const; //===--------------------------------------------------------------------===// @@ -301,12 +349,12 @@ public: bool isDead() const { assert(isReg() && "Wrong MachineOperand accessor"); - return IsDead; + return IsDeadOrKill & IsDef; } bool isKill() const { assert(isReg() && "Wrong MachineOperand accessor"); - return IsKill; + return IsDeadOrKill & !IsDef; } bool isUndef() const { @@ -314,6 +362,8 @@ public: return IsUndef; } + bool isRenamable() const; + bool isInternalRead() const { assert(isReg() && "Wrong MachineOperand accessor"); return IsInternalRead; @@ -369,12 +419,13 @@ public: /// substPhysReg - Substitute the current register with the physical register /// Reg, taking any existing SubReg into account. For instance, - /// substPhysReg(%EAX) will change %reg1024:sub_8bit to %AL. + /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al. /// void substPhysReg(unsigned Reg, const TargetRegisterInfo&); void setIsUse(bool Val = true) { setIsDef(!Val); } + /// Change a def to a use, or a use to a def. void setIsDef(bool Val = true); void setImplicit(bool Val = true) { @@ -385,12 +436,12 @@ public: void setIsKill(bool Val = true) { assert(isReg() && !IsDef && "Wrong MachineOperand mutator"); assert((!Val || !isDebug()) && "Marking a debug operation as kill"); - IsKill = Val; + IsDeadOrKill = Val; } void setIsDead(bool Val = true) { assert(isReg() && IsDef && "Wrong MachineOperand mutator"); - IsDead = Val; + IsDeadOrKill = Val; } void setIsUndef(bool Val = true) { @@ -398,6 +449,12 @@ public: IsUndef = Val; } + void setIsRenamable(bool Val = true); + + /// Set IsRenamable to true if there are no extra register allocation + /// requirements placed on this operand by the parent instruction's opcode. + void setIsRenamableIfNoExtraRegAllocReq(); + void setIsInternalRead(bool Val = true) { assert(isReg() && "Wrong MachineOperand mutator"); IsInternalRead = Val; @@ -549,6 +606,11 @@ public: Contents.OffsetedInfo.Val.Index = Idx; } + void setMetadata(const MDNode *MD) { + assert(isMetadata() && "Wrong MachineOperand mutator"); + Contents.MD = MD; + } + void setMBB(MachineBasicBlock *MBB) { assert(isMBB() && "Wrong MachineOperand mutator"); Contents.MBB = MBB; @@ -568,14 +630,16 @@ public: //===--------------------------------------------------------------------===// /// Returns true if this operand is identical to the specified operand except - /// for liveness related flags (isKill, isUndef and isDead). + /// for liveness related flags (isKill, isUndef and isDead). Note that this + /// should stay in sync with the hash_value overload below. bool isIdenticalTo(const MachineOperand &Other) const; /// \brief MachineOperand hash_value overload. /// /// Note that this includes the same information in the hash that /// isIdenticalTo uses for comparison. It is thus suited for use in hash - /// tables which use that function for equality comparisons only. + /// tables which use that function for equality comparisons only. This must + /// stay exactly in sync with isIdenticalTo above. friend hash_code hash_value(const MachineOperand &MO); /// ChangeToImmediate - Replace this operand with a new immediate operand of @@ -597,6 +661,10 @@ public: /// Replace this operand with a frame index. void ChangeToFrameIndex(int Idx); + /// Replace this operand with a target index. + void ChangeToTargetIndex(unsigned Idx, int64_t Offset, + unsigned char TargetFlags = 0); + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -630,16 +698,16 @@ public: bool isKill = false, bool isDead = false, bool isUndef = false, bool isEarlyClobber = false, - unsigned SubReg = 0, - bool isDebug = false, - bool isInternalRead = false) { + unsigned SubReg = 0, bool isDebug = false, + bool isInternalRead = false, + bool isRenamable = false) { assert(!(isDead && !isDef) && "Dead flag on non-def"); assert(!(isKill && isDef) && "Kill flag on def"); MachineOperand Op(MachineOperand::MO_Register); Op.IsDef = isDef; Op.IsImp = isImp; - Op.IsKill = isKill; - Op.IsDead = isDead; + Op.IsDeadOrKill = isKill | isDead; + Op.IsRenamable = isRenamable; Op.IsUndef = isUndef; Op.IsInternalRead = isInternalRead; Op.IsEarlyClobber = isEarlyClobber; @@ -679,8 +747,7 @@ public: Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateJTI(unsigned Idx, - unsigned char TargetFlags = 0) { + static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_JumpTableIndex); Op.setIndex(Idx); Op.setTargetFlags(TargetFlags); @@ -711,12 +778,12 @@ public: return Op; } /// CreateRegMask - Creates a register mask operand referencing Mask. The - /// operand does not take ownership of the memory referenced by Mask, it must - /// remain valid for the lifetime of the operand. + /// operand does not take ownership of the memory referenced by Mask, it + /// must remain valid for the lifetime of the operand. /// - /// A RegMask operand represents a set of non-clobbered physical registers on - /// an instruction that clobbers many registers, typically a call. The bit - /// mask has a bit set for each physreg that is preserved by this + /// A RegMask operand represents a set of non-clobbered physical registers + /// on an instruction that clobbers many registers, typically a call. The + /// bit mask has a bit set for each physreg that is preserved by this /// instruction, as described in the documentation for /// TargetRegisterInfo::getCallPreservedMask(). /// @@ -769,30 +836,63 @@ public: friend class MachineInstr; friend class MachineRegisterInfo; + private: + // If this operand is currently a register operand, and if this is in a + // function, deregister the operand from the register's use/def list. void removeRegFromUses(); + /// Artificial kinds for DenseMap usage. + enum : unsigned char { + MO_Empty = MO_Last + 1, + MO_Tombstone, + }; + + friend struct DenseMapInfo<MachineOperand>; + //===--------------------------------------------------------------------===// // Methods for handling register use/def lists. //===--------------------------------------------------------------------===// - /// isOnRegUseList - Return true if this operand is on a register use/def list - /// or false if not. This can only be called for register operands that are - /// part of a machine instruction. + /// isOnRegUseList - Return true if this operand is on a register use/def + /// list or false if not. This can only be called for register operands + /// that are part of a machine instruction. bool isOnRegUseList() const { assert(isReg() && "Can only add reg operand to use lists"); return Contents.Reg.Prev != nullptr; } }; -inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) { - MO.print(OS, nullptr); +template <> struct DenseMapInfo<MachineOperand> { + static MachineOperand getEmptyKey() { + return MachineOperand(static_cast<MachineOperand::MachineOperandType>( + MachineOperand::MO_Empty)); + } + static MachineOperand getTombstoneKey() { + return MachineOperand(static_cast<MachineOperand::MachineOperandType>( + MachineOperand::MO_Tombstone)); + } + static unsigned getHashValue(const MachineOperand &MO) { + return hash_value(MO); + } + static bool isEqual(const MachineOperand &LHS, const MachineOperand &RHS) { + if (LHS.getType() == static_cast<MachineOperand::MachineOperandType>( + MachineOperand::MO_Empty) || + LHS.getType() == static_cast<MachineOperand::MachineOperandType>( + MachineOperand::MO_Tombstone)) + return LHS.getType() == RHS.getType(); + return LHS.isIdenticalTo(RHS); + } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand &MO) { + MO.print(OS); return OS; } - // See friend declaration above. This additional declaration is required in - // order to compile LLVM with IBM xlC compiler. - hash_code hash_value(const MachineOperand &MO); -} // End llvm namespace +// See friend declaration above. This additional declaration is required in +// order to compile LLVM with IBM xlC compiler. +hash_code hash_value(const MachineOperand &MO); +} // namespace llvm #endif diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index 6ad5de533d13d..2fdefbed37ce0 100644 --- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -16,7 +16,7 @@ #ifndef LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H #define LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -33,7 +33,7 @@ public: const DiagnosticLocation &Loc, const MachineBasicBlock *MBB) : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, RemarkName, - *MBB->getParent()->getFunction(), Loc), + MBB->getParent()->getFunction(), Loc), MBB(MBB) {} /// MI-specific kinds of diagnostic Arguments. @@ -73,7 +73,9 @@ public: /// \see DiagnosticInfoOptimizationBase::isEnabled. bool isEnabled() const override { - return OptimizationRemark::isEnabled(getPassName()); + const Function &Fn = getFunction(); + LLVMContext &Ctx = Fn.getContext(); + return Ctx.getDiagHandlerPtr()->isPassedOptRemarkEnabled(getPassName()); } }; @@ -97,7 +99,9 @@ public: /// \see DiagnosticInfoOptimizationBase::isEnabled. bool isEnabled() const override { - return OptimizationRemarkMissed::isEnabled(getPassName()); + const Function &Fn = getFunction(); + LLVMContext &Ctx = Fn.getContext(); + return Ctx.getDiagHandlerPtr()->isMissedOptRemarkEnabled(getPassName()); } }; @@ -121,7 +125,9 @@ public: /// \see DiagnosticInfoOptimizationBase::isEnabled. bool isEnabled() const override { - return OptimizationRemarkAnalysis::isEnabled(getPassName()); + const Function &Fn = getFunction(); + LLVMContext &Ctx = Fn.getContext(); + return Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(getPassName()); } }; @@ -152,10 +158,25 @@ public: /// that are normally too noisy. In this mode, we can use the extra analysis /// (1) to filter trivial false positives or (2) to provide more context so /// that non-trivial false positives can be quickly detected by the user. - bool allowExtraAnalysis() const { - // For now, only allow this with -fsave-optimization-record since the -Rpass - // options are handled in the front-end. - return MF.getFunction()->getContext().getDiagnosticsOutputFile(); + bool allowExtraAnalysis(StringRef PassName) const { + return (MF.getFunction().getContext().getDiagnosticsOutputFile() || + MF.getFunction().getContext() + .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); + } + + /// \brief Take a lambda that returns a remark which will be emitted. Second + /// argument is only used to restrict this to functions. + template <typename T> + void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) { + // Avoid building the remark unless we know there are at least *some* + // remarks enabled. We can't currently check whether remarks are requested + // for the calling pass since that requires actually building the remark. + + if (MF.getFunction().getContext().getDiagnosticsOutputFile() || + MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { + auto R = RemarkBuilder(); + emit((DiagnosticInfoOptimizationBase &)R); + } } private: diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h index d29d2d85cb0a5..c6a41598ce32c 100644 --- a/include/llvm/CodeGen/MachinePostDominators.h +++ b/include/llvm/CodeGen/MachinePostDominators.h @@ -37,7 +37,7 @@ public: FunctionPass *createMachinePostDominatorTreePass(); - const std::vector<MachineBasicBlock *> &getRoots() const { + const SmallVectorImpl<MachineBasicBlock *> &getRoots() const { return DT->getRoots(); } diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 5ef0ac90e3c2a..3be94f8021701 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -27,9 +27,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/LaneBitmask.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> #include <cstddef> #include <cstdint> @@ -84,14 +84,15 @@ private: /// all registers that were disabled are removed from the list. SmallVector<MCPhysReg, 16> UpdatedCSRs; - /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and hint type - /// pair making up the allocation hint. Hint type is target specific except - /// for the value 0 which means the second value of the pair is the preferred - /// register for allocation. For example, if the hint is <0, 1024>, it means - /// the allocator should prefer the physical register allocated to the virtual - /// register of the hint. - IndexedMap<std::pair<unsigned, unsigned>, VirtReg2IndexFunctor> RegAllocHints; + /// RegAllocHints - This vector records register allocation hints for + /// virtual registers. For each virtual register, it keeps a pair of hint + /// type and hints vector making up the allocation hints. Only the first + /// hint may be target specific, and in that case this is reflected by the + /// first member of the pair being non-zero. If the hinted register is + /// virtual, it means the allocator should prefer the physical register + /// allocated to it if any. + IndexedMap<std::pair<unsigned, SmallVector<unsigned, 4>>, + VirtReg2IndexFunctor> RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -575,14 +576,16 @@ public: /// preserve conservative kill flag information. void clearKillFlags(unsigned Reg) const; -#ifndef NDEBUG void dumpUses(unsigned RegNo) const; -#endif /// Returns true if PhysReg is unallocatable and constant throughout the /// function. Writing to a constant register has no effect. bool isConstantPhysReg(unsigned PhysReg) const; + /// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg + /// returns true. This is a utility member function. + bool isCallerPreservedOrConstPhysReg(unsigned PhysReg) const; + /// Get an iterator over the pressure sets affected by the given physical or /// virtual register. If RegUnit is physical, it must be a register unit (from /// MCRegUnitIterator). @@ -704,35 +707,61 @@ public: void clearVirtRegs(); /// setRegAllocationHint - Specify a register allocation hint for the - /// specified virtual register. + /// specified virtual register. This is typically used by target, and in case + /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { assert(TargetRegisterInfo::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; - RegAllocHints[VReg].second = PrefReg; + RegAllocHints[VReg].second.clear(); + RegAllocHints[VReg].second.push_back(PrefReg); } - /// Specify the preferred register allocation hint for the specified virtual - /// register. + /// addRegAllocationHint - Add a register allocation hint to the hints + /// vector for VReg. + void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + RegAllocHints[VReg].second.push_back(PrefReg); + } + + /// Specify the preferred (target independent) register allocation hint for + /// the specified virtual register. void setSimpleHint(unsigned VReg, unsigned PrefReg) { setRegAllocationHint(VReg, /*Type=*/0, PrefReg); } + void clearSimpleHint(unsigned VReg) { + assert (RegAllocHints[VReg].first == 0 && + "Expected to clear a non-target hint!"); + RegAllocHints[VReg].second.clear(); + } + /// getRegAllocationHint - Return the register allocation hint for the - /// specified virtual register. + /// specified virtual register. If there are many hints, this returns the + /// one with the greatest weight. std::pair<unsigned, unsigned> getRegAllocationHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); - return RegAllocHints[VReg]; + unsigned BestHint = (RegAllocHints[VReg].second.size() ? + RegAllocHints[VReg].second[0] : 0); + return std::pair<unsigned, unsigned>(RegAllocHints[VReg].first, BestHint); } - /// getSimpleHint - Return the preferred register allocation hint, or 0 if a - /// standard simple hint (Type == 0) is not set. + /// getSimpleHint - same as getRegAllocationHint except it will only return + /// a target independent hint. unsigned getSimpleHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); std::pair<unsigned, unsigned> Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } + /// getRegAllocationHints - Return a reference to the vector of all + /// register allocation hints for VReg. + const std::pair<unsigned, SmallVector<unsigned, 4>> + &getRegAllocationHints(unsigned VReg) const { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + return RegAllocHints[VReg]; + } + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. @@ -844,6 +873,10 @@ public: livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } + ArrayRef<std::pair<unsigned, unsigned>> liveins() const { + return LiveIns; + } + bool isLiveIn(unsigned Reg) const; /// getLiveInPhysReg - If VReg is a live-in virtual register, return the diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h index 50a7d90bf25b0..b5ea2080444d5 100644 --- a/include/llvm/CodeGen/MachineSSAUpdater.h +++ b/include/llvm/CodeGen/MachineSSAUpdater.h @@ -1,4 +1,4 @@ -//===-- MachineSSAUpdater.h - Unstructured SSA Update Tool ------*- C++ -*-===// +//===- MachineSSAUpdater.h - Unstructured SSA Update Tool -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,18 +14,17 @@ #ifndef LLVM_CODEGEN_MACHINESSAUPDATER_H #define LLVM_CODEGEN_MACHINESSAUPDATER_H -#include "llvm/Support/Compiler.h" - namespace llvm { - class MachineBasicBlock; - class MachineFunction; - class MachineInstr; - class MachineOperand; - class MachineRegisterInfo; - class TargetInstrInfo; - class TargetRegisterClass; - template<typename T> class SmallVectorImpl; - template<typename T> class SSAUpdaterTraits; + +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterClass; +template<typename T> class SmallVectorImpl; +template<typename T> class SSAUpdaterTraits; /// MachineSSAUpdater - This class updates SSA form for a set of virtual /// registers defined in multiple blocks. This is used when code duplication @@ -38,7 +37,7 @@ private: /// AvailableVals - This keeps track of which value to use on a per-block /// basis. When we insert PHI nodes, we keep track of them here. //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy; - void *AV; + void *AV = nullptr; /// VR - Current virtual register whose uses are being updated. unsigned VR; @@ -52,11 +51,14 @@ private: const TargetInstrInfo *TII; MachineRegisterInfo *MRI; + public: /// MachineSSAUpdater constructor. If InsertedPHIs is specified, it will be /// filled in with all PHI Nodes created by rewriting. explicit MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *InsertedPHIs = nullptr); + MachineSSAUpdater(const MachineSSAUpdater &) = delete; + MachineSSAUpdater &operator=(const MachineSSAUpdater &) = delete; ~MachineSSAUpdater(); /// Initialize - Reset this object to get ready for a new set of SSA @@ -93,7 +95,6 @@ public: /// their respective blocks. However, the use of X happens in the *middle* of /// a block. Because of this, we need to insert a new PHI node in SomeBB to /// merge the appropriate values, and this value isn't live out of the block. - /// unsigned GetValueInMiddleOfBlock(MachineBasicBlock *BB); /// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, @@ -105,11 +106,8 @@ public: private: unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB); - - void operator=(const MachineSSAUpdater&) = delete; - MachineSSAUpdater(const MachineSSAUpdater&) = delete; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINESSAUPDATER_H diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 8590b7a348cfc..e327881de13aa 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -26,7 +26,7 @@ // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list // scheduling while updating the instruction stream, register pressure, and live // intervals. Most targets don't need to override the DAG builder and list -// schedulier, but subtargets that require custom scheduling heuristics may +// scheduler, but subtargets that require custom scheduling heuristics may // plugin an alternate MachineSchedStrategy. The strategy is responsible for // selecting the highest priority node from the list: // @@ -214,9 +214,20 @@ public: /// This has to be enabled in combination with shouldTrackPressure(). virtual bool shouldTrackLaneMasks() const { return false; } + // If this method returns true, handling of the scheduling regions + // themselves (in case of a scheduling boundary in MBB) will be done + // beginning with the topmost region of MBB. + virtual bool doMBBSchedRegionsTopDown() const { return false; } + /// Initialize the strategy after building the DAG for a new region. virtual void initialize(ScheduleDAGMI *DAG) = 0; + /// Tell the strategy that MBB is about to be processed. + virtual void enterMBB(MachineBasicBlock *MBB) {}; + + /// Tell the strategy that current MBB is done. + virtual void leaveMBB() {}; + /// Notify this strategy that all roots have been released (including those /// that depend on EntrySU or ExitSU). virtual void registerRoots() {} @@ -284,6 +295,13 @@ public: // Provide a vtable anchor ~ScheduleDAGMI() override; + /// If this method returns true, handling of the scheduling regions + /// themselves (in case of a scheduling boundary in MBB) will be done + /// beginning with the topmost region of MBB. + bool doMBBSchedRegionsTopDown() const override { + return SchedImpl->doMBBSchedRegionsTopDown(); + } + // Returns LiveIntervals instance for use in DAG mutators and such. LiveIntervals *getLIS() const { return LIS; } @@ -326,6 +344,9 @@ public: /// reorderable instructions. void schedule() override; + void startBlock(MachineBasicBlock *bb) override; + void finishBlock() override; + /// Change the position of an instruction within the basic block and update /// live ranges and region boundary iterators. void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); @@ -755,9 +776,7 @@ public: /// available instruction, or NULL if there are multiple candidates. SUnit *pickOnlyChoice(); -#ifndef NDEBUG void dumpScheduledState() const; -#endif }; /// Base class for GenericScheduler. This class maintains information about diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h index 284f8c1976076..9d8db393ca92a 100644 --- a/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/include/llvm/CodeGen/MachineTraceMetrics.h @@ -47,17 +47,18 @@ #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H #define LLVM_CODEGEN_MACHINETRACEMETRICS_H +#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetSchedule.h" namespace llvm { class AnalysisUsage; -class MachineBasicBlock; class MachineFunction; class MachineInstr; class MachineLoop; @@ -68,6 +69,22 @@ class raw_ostream; class TargetInstrInfo; class TargetRegisterInfo; +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle = 0; + const MachineInstr *MI = nullptr; + unsigned Op = 0; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU) {} +}; + + class MachineTraceMetrics : public MachineFunctionPass { const MachineFunction *MF = nullptr; const TargetInstrInfo *TII = nullptr; @@ -343,6 +360,18 @@ public: /// Get the trace that passes through MBB. /// The trace is computed on demand. Trace getTrace(const MachineBasicBlock *MBB); + + /// Updates the depth of an machine instruction, given RegUnits. + void updateDepth(TraceBlockInfo &TBI, const MachineInstr&, + SparseSet<LiveRegUnit> &RegUnits); + void updateDepth(const MachineBasicBlock *, const MachineInstr&, + SparseSet<LiveRegUnit> &RegUnits); + + /// Updates the depth of the instructions from Start to End. + void updateDepths(MachineBasicBlock::iterator Start, + MachineBasicBlock::iterator End, + SparseSet<LiveRegUnit> &RegUnits); + }; /// Strategies for selecting traces. diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h index 0bdb38bfcbec8..b452684757f63 100644 --- a/include/llvm/CodeGen/MachineValueType.h +++ b/include/llvm/CodeGen/MachineValueType.h @@ -64,80 +64,81 @@ namespace llvm { v16i1 = 18, // 16 x i1 v32i1 = 19, // 32 x i1 v64i1 = 20, // 64 x i1 - v512i1 = 21, // 512 x i1 - v1024i1 = 22, // 1024 x i1 - - v1i8 = 23, // 1 x i8 - v2i8 = 24, // 2 x i8 - v4i8 = 25, // 4 x i8 - v8i8 = 26, // 8 x i8 - v16i8 = 27, // 16 x i8 - v32i8 = 28, // 32 x i8 - v64i8 = 29, // 64 x i8 - v128i8 = 30, //128 x i8 - v256i8 = 31, //256 x i8 - - v1i16 = 32, // 1 x i16 - v2i16 = 33, // 2 x i16 - v4i16 = 34, // 4 x i16 - v8i16 = 35, // 8 x i16 - v16i16 = 36, // 16 x i16 - v32i16 = 37, // 32 x i16 - v64i16 = 38, // 64 x i16 - v128i16 = 39, //128 x i16 - - v1i32 = 40, // 1 x i32 - v2i32 = 41, // 2 x i32 - v4i32 = 42, // 4 x i32 - v8i32 = 43, // 8 x i32 - v16i32 = 44, // 16 x i32 - v32i32 = 45, // 32 x i32 - v64i32 = 46, // 64 x i32 - - v1i64 = 47, // 1 x i64 - v2i64 = 48, // 2 x i64 - v4i64 = 49, // 4 x i64 - v8i64 = 50, // 8 x i64 - v16i64 = 51, // 16 x i64 - v32i64 = 52, // 32 x i64 - - v1i128 = 53, // 1 x i128 + v128i1 = 21, // 128 x i1 + v512i1 = 22, // 512 x i1 + v1024i1 = 23, // 1024 x i1 + + v1i8 = 24, // 1 x i8 + v2i8 = 25, // 2 x i8 + v4i8 = 26, // 4 x i8 + v8i8 = 27, // 8 x i8 + v16i8 = 28, // 16 x i8 + v32i8 = 29, // 32 x i8 + v64i8 = 30, // 64 x i8 + v128i8 = 31, //128 x i8 + v256i8 = 32, //256 x i8 + + v1i16 = 33, // 1 x i16 + v2i16 = 34, // 2 x i16 + v4i16 = 35, // 4 x i16 + v8i16 = 36, // 8 x i16 + v16i16 = 37, // 16 x i16 + v32i16 = 38, // 32 x i16 + v64i16 = 39, // 64 x i16 + v128i16 = 40, //128 x i16 + + v1i32 = 41, // 1 x i32 + v2i32 = 42, // 2 x i32 + v4i32 = 43, // 4 x i32 + v8i32 = 44, // 8 x i32 + v16i32 = 45, // 16 x i32 + v32i32 = 46, // 32 x i32 + v64i32 = 47, // 64 x i32 + + v1i64 = 48, // 1 x i64 + v2i64 = 49, // 2 x i64 + v4i64 = 50, // 4 x i64 + v8i64 = 51, // 8 x i64 + v16i64 = 52, // 16 x i64 + v32i64 = 53, // 32 x i64 + + v1i128 = 54, // 1 x i128 // Scalable integer types - nxv1i1 = 54, // n x 1 x i1 - nxv2i1 = 55, // n x 2 x i1 - nxv4i1 = 56, // n x 4 x i1 - nxv8i1 = 57, // n x 8 x i1 - nxv16i1 = 58, // n x 16 x i1 - nxv32i1 = 59, // n x 32 x i1 - - nxv1i8 = 60, // n x 1 x i8 - nxv2i8 = 61, // n x 2 x i8 - nxv4i8 = 62, // n x 4 x i8 - nxv8i8 = 63, // n x 8 x i8 - nxv16i8 = 64, // n x 16 x i8 - nxv32i8 = 65, // n x 32 x i8 - - nxv1i16 = 66, // n x 1 x i16 - nxv2i16 = 67, // n x 2 x i16 - nxv4i16 = 68, // n x 4 x i16 - nxv8i16 = 69, // n x 8 x i16 - nxv16i16 = 70, // n x 16 x i16 - nxv32i16 = 71, // n x 32 x i16 - - nxv1i32 = 72, // n x 1 x i32 - nxv2i32 = 73, // n x 2 x i32 - nxv4i32 = 74, // n x 4 x i32 - nxv8i32 = 75, // n x 8 x i32 - nxv16i32 = 76, // n x 16 x i32 - nxv32i32 = 77, // n x 32 x i32 - - nxv1i64 = 78, // n x 1 x i64 - nxv2i64 = 79, // n x 2 x i64 - nxv4i64 = 80, // n x 4 x i64 - nxv8i64 = 81, // n x 8 x i64 - nxv16i64 = 82, // n x 16 x i64 - nxv32i64 = 83, // n x 32 x i64 + nxv1i1 = 55, // n x 1 x i1 + nxv2i1 = 56, // n x 2 x i1 + nxv4i1 = 57, // n x 4 x i1 + nxv8i1 = 58, // n x 8 x i1 + nxv16i1 = 59, // n x 16 x i1 + nxv32i1 = 60, // n x 32 x i1 + + nxv1i8 = 61, // n x 1 x i8 + nxv2i8 = 62, // n x 2 x i8 + nxv4i8 = 63, // n x 4 x i8 + nxv8i8 = 64, // n x 8 x i8 + nxv16i8 = 65, // n x 16 x i8 + nxv32i8 = 66, // n x 32 x i8 + + nxv1i16 = 67, // n x 1 x i16 + nxv2i16 = 68, // n x 2 x i16 + nxv4i16 = 69, // n x 4 x i16 + nxv8i16 = 70, // n x 8 x i16 + nxv16i16 = 71, // n x 16 x i16 + nxv32i16 = 72, // n x 32 x i16 + + nxv1i32 = 73, // n x 1 x i32 + nxv2i32 = 74, // n x 2 x i32 + nxv4i32 = 75, // n x 4 x i32 + nxv8i32 = 76, // n x 8 x i32 + nxv16i32 = 77, // n x 16 x i32 + nxv32i32 = 78, // n x 32 x i32 + + nxv1i64 = 79, // n x 1 x i64 + nxv2i64 = 80, // n x 2 x i64 + nxv4i64 = 81, // n x 4 x i64 + nxv8i64 = 82, // n x 8 x i64 + nxv16i64 = 83, // n x 16 x i64 + nxv32i64 = 84, // n x 32 x i64 FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, @@ -145,31 +146,31 @@ namespace llvm { FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, - v2f16 = 84, // 2 x f16 - v4f16 = 85, // 4 x f16 - v8f16 = 86, // 8 x f16 - v1f32 = 87, // 1 x f32 - v2f32 = 88, // 2 x f32 - v4f32 = 89, // 4 x f32 - v8f32 = 90, // 8 x f32 - v16f32 = 91, // 16 x f32 - v1f64 = 92, // 1 x f64 - v2f64 = 93, // 2 x f64 - v4f64 = 94, // 4 x f64 - v8f64 = 95, // 8 x f64 - - nxv2f16 = 96, // n x 2 x f16 - nxv4f16 = 97, // n x 4 x f16 - nxv8f16 = 98, // n x 8 x f16 - nxv1f32 = 99, // n x 1 x f32 - nxv2f32 = 100, // n x 2 x f32 - nxv4f32 = 101, // n x 4 x f32 - nxv8f32 = 102, // n x 8 x f32 - nxv16f32 = 103, // n x 16 x f32 - nxv1f64 = 104, // n x 1 x f64 - nxv2f64 = 105, // n x 2 x f64 - nxv4f64 = 106, // n x 4 x f64 - nxv8f64 = 107, // n x 8 x f64 + v2f16 = 85, // 2 x f16 + v4f16 = 86, // 4 x f16 + v8f16 = 87, // 8 x f16 + v1f32 = 88, // 1 x f32 + v2f32 = 89, // 2 x f32 + v4f32 = 90, // 4 x f32 + v8f32 = 91, // 8 x f32 + v16f32 = 92, // 16 x f32 + v1f64 = 93, // 1 x f64 + v2f64 = 94, // 2 x f64 + v4f64 = 95, // 4 x f64 + v8f64 = 96, // 8 x f64 + + nxv2f16 = 97, // n x 2 x f16 + nxv4f16 = 98, // n x 4 x f16 + nxv8f16 = 99, // n x 8 x f16 + nxv1f32 = 100, // n x 1 x f32 + nxv2f32 = 101, // n x 2 x f32 + nxv4f32 = 102, // n x 4 x f32 + nxv8f32 = 103, // n x 8 x f32 + nxv16f32 = 104, // n x 16 x f32 + nxv1f64 = 105, // n x 1 x f64 + nxv2f64 = 106, // n x 2 x f64 + nxv4f64 = 107, // n x 4 x f64 + nxv8f64 = 108, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = nxv8f64, @@ -180,18 +181,18 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 108, // This is an X86 MMX value + x86mmx = 109, // This is an X86 MMX value - Glue = 109, // This glues nodes together during pre-RA sched + Glue = 110, // This glues nodes together during pre-RA sched - isVoid = 110, // This has no value + isVoid = 111, // This has no value - Untyped = 111, // This value takes a register, but has + Untyped = 112, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 112, // This always remains at the end of the list. + LAST_VALUETYPE = 113, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -346,10 +347,11 @@ namespace llvm { /// Return true if this is a 128-bit vector type. bool is128BitVector() const { - return (SimpleTy == MVT::v16i8 || SimpleTy == MVT::v8i16 || - SimpleTy == MVT::v4i32 || SimpleTy == MVT::v2i64 || - SimpleTy == MVT::v1i128 || SimpleTy == MVT::v8f16 || - SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64); + return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 || + SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 || + SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 || + SimpleTy == MVT::v8f16 || SimpleTy == MVT::v4f32 || + SimpleTy == MVT::v2f64); } /// Return true if this is a 256-bit vector type. @@ -420,6 +422,7 @@ namespace llvm { case v16i1: case v32i1: case v64i1: + case v128i1: case v512i1: case v1024i1: case nxv1i1: @@ -517,6 +520,7 @@ namespace llvm { case v1024i1: return 1024; case v512i1: return 512; case v256i8: return 256; + case v128i1: case v128i8: case v128i16: return 128; case v64i1: @@ -690,6 +694,7 @@ namespace llvm { case f128: case ppcf128: case i128: + case v128i1: case v16i8: case v8i16: case v4i32: @@ -828,6 +833,7 @@ namespace llvm { if (NumElements == 16) return MVT::v16i1; if (NumElements == 32) return MVT::v32i1; if (NumElements == 64) return MVT::v64i1; + if (NumElements == 128) return MVT::v128i1; if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; break; diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h index 8d5d2374679d3..6a247277fdfab 100644 --- a/include/llvm/CodeGen/PBQP/Solution.h +++ b/include/llvm/CodeGen/PBQP/Solution.h @@ -29,11 +29,6 @@ namespace PBQP { using SelectionsMap = std::map<GraphBase::NodeId, unsigned>; SelectionsMap selections; - unsigned r0Reductions = 0; - unsigned r1Reductions = 0; - unsigned r2Reductions = 0; - unsigned rNReductions = 0; - public: /// \brief Initialise an empty solution. Solution() = default; diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 96cfce5b84dfe..4370d116e08c3 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -43,9 +43,6 @@ namespace llvm { /// the entry block. FunctionPass *createUnreachableBlockEliminationPass(); - /// Insert mcount-like function calls. - FunctionPass *createCountingFunctionInserterPass(); - /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * @@ -409,17 +406,17 @@ namespace llvm { /// This pass frees the memory occupied by the MachineFunction. FunctionPass *createFreeMachineFunctionPass(); - /// This pass combine basic blocks guarded by the same branch. - extern char &BranchCoalescingID; - /// This pass performs outlining on machine instructions directly before /// printing assembly. - ModulePass *createMachineOutlinerPass(); + ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs = false); /// This pass expands the experimental reduction intrinsics into sequences of /// shuffles. FunctionPass *createExpandReductionsPass(); + // This pass expands memcmp() to load/stores. + FunctionPass *createExpandMemCmpPass(); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/include/llvm/CodeGen/PreISelIntrinsicLowering.h index 765ca085244aa..7a007eb8bceac 100644 --- a/include/llvm/CodeGen/PreISelIntrinsicLowering.h +++ b/include/llvm/CodeGen/PreISelIntrinsicLowering.h @@ -1,4 +1,4 @@ -//===--- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ----===// +//===- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ------===// // // The LLVM Compiler Infrastructure // @@ -17,10 +17,13 @@ namespace llvm { +class Module; + struct PreISelIntrinsicLoweringPass : PassInfoMixin<PreISelIntrinsicLoweringPass> { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_CODEGEN_PREISELINTRINSICLOWERING_H diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h index f5aedb07e4d2b..bdf0bb7315402 100644 --- a/include/llvm/CodeGen/PseudoSourceValue.h +++ b/include/llvm/CodeGen/PseudoSourceValue.h @@ -25,6 +25,7 @@ namespace llvm { class MachineFrameInfo; class MachineMemOperand; class raw_ostream; +class TargetInstrInfo; raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO); class PseudoSourceValue; @@ -48,6 +49,7 @@ public: private: PSVKind Kind; + unsigned AddressSpace; friend raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV); @@ -58,7 +60,7 @@ private: virtual void printCustom(raw_ostream &O) const; public: - explicit PseudoSourceValue(PSVKind Kind); + explicit PseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII); virtual ~PseudoSourceValue(); @@ -68,6 +70,9 @@ public: bool isGOT() const { return Kind == GOT; } bool isConstantPool() const { return Kind == ConstantPool; } bool isJumpTable() const { return Kind == JumpTable; } + + unsigned getAddressSpace() const { return AddressSpace; } + unsigned getTargetCustom() const { return (Kind >= TargetCustom) ? ((Kind+1) - TargetCustom) : 0; } @@ -91,8 +96,8 @@ class FixedStackPseudoSourceValue : public PseudoSourceValue { const int FI; public: - explicit FixedStackPseudoSourceValue(int FI) - : PseudoSourceValue(FixedStack), FI(FI) {} + explicit FixedStackPseudoSourceValue(int FI, const TargetInstrInfo &TII) + : PseudoSourceValue(FixedStack, TII), FI(FI) {} static bool classof(const PseudoSourceValue *V) { return V->kind() == FixedStack; @@ -111,7 +116,7 @@ public: class CallEntryPseudoSourceValue : public PseudoSourceValue { protected: - CallEntryPseudoSourceValue(PSVKind Kind); + CallEntryPseudoSourceValue(PSVKind Kind, const TargetInstrInfo &TII); public: bool isConstant(const MachineFrameInfo *) const override; @@ -124,7 +129,8 @@ class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue { const GlobalValue *GV; public: - GlobalValuePseudoSourceValue(const GlobalValue *GV); + GlobalValuePseudoSourceValue(const GlobalValue *GV, + const TargetInstrInfo &TII); static bool classof(const PseudoSourceValue *V) { return V->kind() == GlobalValueCallEntry; @@ -138,7 +144,7 @@ class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue { const char *ES; public: - ExternalSymbolPseudoSourceValue(const char *ES); + ExternalSymbolPseudoSourceValue(const char *ES, const TargetInstrInfo &TII); static bool classof(const PseudoSourceValue *V) { return V->kind() == ExternalSymbolCallEntry; @@ -149,6 +155,7 @@ public: /// Manages creation of pseudo source values. class PseudoSourceValueManager { + const TargetInstrInfo &TII; const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV; std::map<int, std::unique_ptr<FixedStackPseudoSourceValue>> FSValues; StringMap<std::unique_ptr<const ExternalSymbolPseudoSourceValue>> @@ -158,7 +165,7 @@ class PseudoSourceValueManager { GlobalCallEntries; public: - PseudoSourceValueManager(); + PseudoSourceValueManager(const TargetInstrInfo &TII); /// Return a pseudo source value referencing the area below the stack frame of /// a function, e.g., the argument space. diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h index 355c9f9b2f1e6..97113c575815b 100644 --- a/include/llvm/CodeGen/RegisterClassInfo.h +++ b/include/llvm/CodeGen/RegisterClassInfo.h @@ -20,8 +20,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstdint> #include <memory> diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index e997aaf269e31..2b14b78d621d7 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -20,8 +20,8 @@ #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstddef> #include <cstdint> diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h index 0a04bc6a89f4d..eabadd8d784a8 100644 --- a/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/include/llvm/CodeGen/RegisterUsageInfo.h @@ -20,6 +20,7 @@ #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include <cstdint> #include <vector> diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h index 9c8f5f487d382..03166ccdfe384 100644 --- a/include/llvm/CodeGen/ResourcePriorityQueue.h +++ b/include/llvm/CodeGen/ResourcePriorityQueue.h @@ -20,15 +20,15 @@ #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ResourcePriorityQueue; /// Sorting functions for the Available queue. - struct resource_sort : public std::binary_function<SUnit*, SUnit*, bool> { + struct resource_sort { ResourcePriorityQueue *PQ; explicit resource_sort(ResourcePriorityQueue *pq) : PQ(pq) {} diff --git a/include/llvm/CodeGen/RuntimeLibcalls.def b/include/llvm/CodeGen/RuntimeLibcalls.def new file mode 100644 index 0000000000000..e042ae982e86c --- /dev/null +++ b/include/llvm/CodeGen/RuntimeLibcalls.def @@ -0,0 +1,492 @@ +//===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the runtime library calls the backend can emit. +// The various long double types cannot be merged, because 80-bit library +// functions use "xf" and 128-bit use "tf". +// +// When adding PPCF128 functions here, note that their names generally need +// to be overridden for Darwin with the xxx$LDBL128 form. See +// PPCISelLowering.cpp. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +// Provide definitions of macros so that users of this file do not have to +// define everything to use it... + +// Declare the enumerator for each libcall, along with its default name. Some +// libcalls have different names on particular OSes or architectures. These +// are set in InitLibcallNames() in TargetLoweringBase.cpp and/or by targets +// using TargetLoweringBase::setLibcallName() +#ifndef HANDLE_LIBCALL +#error "HANDLE_LIBCALL must be defined" +#endif + +// Integer +HANDLE_LIBCALL(SHL_I16, "__ashlhi3") +HANDLE_LIBCALL(SHL_I32, "__ashlsi3") +HANDLE_LIBCALL(SHL_I64, "__ashldi3") +HANDLE_LIBCALL(SHL_I128, "__ashlti3") +HANDLE_LIBCALL(SRL_I16, "__lshrhi3") +HANDLE_LIBCALL(SRL_I32, "__lshrsi3") +HANDLE_LIBCALL(SRL_I64, "__lshrdi3") +HANDLE_LIBCALL(SRL_I128, "__lshrti3") +HANDLE_LIBCALL(SRA_I16, "__ashrhi3") +HANDLE_LIBCALL(SRA_I32, "__ashrsi3") +HANDLE_LIBCALL(SRA_I64, "__ashrdi3") +HANDLE_LIBCALL(SRA_I128, "__ashrti3") +HANDLE_LIBCALL(MUL_I8, "__mulqi3") +HANDLE_LIBCALL(MUL_I16, "__mulhi3") +HANDLE_LIBCALL(MUL_I32, "__mulsi3") +HANDLE_LIBCALL(MUL_I64, "__muldi3") +HANDLE_LIBCALL(MUL_I128, "__multi3") +HANDLE_LIBCALL(MULO_I32, "__mulosi4") +HANDLE_LIBCALL(MULO_I64, "__mulodi4") +HANDLE_LIBCALL(MULO_I128, "__muloti4") +HANDLE_LIBCALL(SDIV_I8, "__divqi3") +HANDLE_LIBCALL(SDIV_I16, "__divhi3") +HANDLE_LIBCALL(SDIV_I32, "__divsi3") +HANDLE_LIBCALL(SDIV_I64, "__divdi3") +HANDLE_LIBCALL(SDIV_I128, "__divti3") +HANDLE_LIBCALL(UDIV_I8, "__udivqi3") +HANDLE_LIBCALL(UDIV_I16, "__udivhi3") +HANDLE_LIBCALL(UDIV_I32, "__udivsi3") +HANDLE_LIBCALL(UDIV_I64, "__udivdi3") +HANDLE_LIBCALL(UDIV_I128, "__udivti3") +HANDLE_LIBCALL(SREM_I8, "__modqi3") +HANDLE_LIBCALL(SREM_I16, "__modhi3") +HANDLE_LIBCALL(SREM_I32, "__modsi3") +HANDLE_LIBCALL(SREM_I64, "__moddi3") +HANDLE_LIBCALL(SREM_I128, "__modti3") +HANDLE_LIBCALL(UREM_I8, "__umodqi3") +HANDLE_LIBCALL(UREM_I16, "__umodhi3") +HANDLE_LIBCALL(UREM_I32, "__umodsi3") +HANDLE_LIBCALL(UREM_I64, "__umoddi3") +HANDLE_LIBCALL(UREM_I128, "__umodti3") +HANDLE_LIBCALL(SDIVREM_I8, nullptr) +HANDLE_LIBCALL(SDIVREM_I16, nullptr) +HANDLE_LIBCALL(SDIVREM_I32, nullptr) +HANDLE_LIBCALL(SDIVREM_I64, nullptr) +HANDLE_LIBCALL(SDIVREM_I128, nullptr) +HANDLE_LIBCALL(UDIVREM_I8, nullptr) +HANDLE_LIBCALL(UDIVREM_I16, nullptr) +HANDLE_LIBCALL(UDIVREM_I32, nullptr) +HANDLE_LIBCALL(UDIVREM_I64, nullptr) +HANDLE_LIBCALL(UDIVREM_I128, nullptr) +HANDLE_LIBCALL(NEG_I32, "__negsi2") +HANDLE_LIBCALL(NEG_I64, "__negdi2") + +// Floating-point +HANDLE_LIBCALL(ADD_F32, "__addsf3") +HANDLE_LIBCALL(ADD_F64, "__adddf3") +HANDLE_LIBCALL(ADD_F80, "__addxf3") +HANDLE_LIBCALL(ADD_F128, "__addtf3") +HANDLE_LIBCALL(ADD_PPCF128, "__gcc_qadd") +HANDLE_LIBCALL(SUB_F32, "__subsf3") +HANDLE_LIBCALL(SUB_F64, "__subdf3") +HANDLE_LIBCALL(SUB_F80, "__subxf3") +HANDLE_LIBCALL(SUB_F128, "__subtf3") +HANDLE_LIBCALL(SUB_PPCF128, "__gcc_qsub") +HANDLE_LIBCALL(MUL_F32, "__mulsf3") +HANDLE_LIBCALL(MUL_F64, "__muldf3") +HANDLE_LIBCALL(MUL_F80, "__mulxf3") +HANDLE_LIBCALL(MUL_F128, "__multf3") +HANDLE_LIBCALL(MUL_PPCF128, "__gcc_qmul") +HANDLE_LIBCALL(DIV_F32, "__divsf3") +HANDLE_LIBCALL(DIV_F64, "__divdf3") +HANDLE_LIBCALL(DIV_F80, "__divxf3") +HANDLE_LIBCALL(DIV_F128, "__divtf3") +HANDLE_LIBCALL(DIV_PPCF128, "__gcc_qdiv") +HANDLE_LIBCALL(REM_F32, "fmodf") +HANDLE_LIBCALL(REM_F64, "fmod") +HANDLE_LIBCALL(REM_F80, "fmodl") +HANDLE_LIBCALL(REM_F128, "fmodl") +HANDLE_LIBCALL(REM_PPCF128, "fmodl") +HANDLE_LIBCALL(FMA_F32, "fmaf") +HANDLE_LIBCALL(FMA_F64, "fma") +HANDLE_LIBCALL(FMA_F80, "fmal") +HANDLE_LIBCALL(FMA_F128, "fmal") +HANDLE_LIBCALL(FMA_PPCF128, "fmal") +HANDLE_LIBCALL(POWI_F32, "__powisf2") +HANDLE_LIBCALL(POWI_F64, "__powidf2") +HANDLE_LIBCALL(POWI_F80, "__powixf2") +HANDLE_LIBCALL(POWI_F128, "__powitf2") +HANDLE_LIBCALL(POWI_PPCF128, "__powitf2") +HANDLE_LIBCALL(SQRT_F32, "sqrtf") +HANDLE_LIBCALL(SQRT_F64, "sqrt") +HANDLE_LIBCALL(SQRT_F80, "sqrtl") +HANDLE_LIBCALL(SQRT_F128, "sqrtl") +HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl") +HANDLE_LIBCALL(LOG_F32, "logf") +HANDLE_LIBCALL(LOG_F64, "log") +HANDLE_LIBCALL(LOG_F80, "logl") +HANDLE_LIBCALL(LOG_F128, "logl") +HANDLE_LIBCALL(LOG_PPCF128, "logl") +HANDLE_LIBCALL(LOG2_F32, "log2f") +HANDLE_LIBCALL(LOG2_F64, "log2") +HANDLE_LIBCALL(LOG2_F80, "log2l") +HANDLE_LIBCALL(LOG2_F128, "log2l") +HANDLE_LIBCALL(LOG2_PPCF128, "log2l") +HANDLE_LIBCALL(LOG10_F32, "log10f") +HANDLE_LIBCALL(LOG10_F64, "log10") +HANDLE_LIBCALL(LOG10_F80, "log10l") +HANDLE_LIBCALL(LOG10_F128, "log10l") +HANDLE_LIBCALL(LOG10_PPCF128, "log10l") +HANDLE_LIBCALL(EXP_F32, "expf") +HANDLE_LIBCALL(EXP_F64, "exp") +HANDLE_LIBCALL(EXP_F80, "expl") +HANDLE_LIBCALL(EXP_F128, "expl") +HANDLE_LIBCALL(EXP_PPCF128, "expl") +HANDLE_LIBCALL(EXP2_F32, "exp2f") +HANDLE_LIBCALL(EXP2_F64, "exp2") +HANDLE_LIBCALL(EXP2_F80, "exp2l") +HANDLE_LIBCALL(EXP2_F128, "exp2l") +HANDLE_LIBCALL(EXP2_PPCF128, "exp2l") +HANDLE_LIBCALL(SIN_F32, "sinf") +HANDLE_LIBCALL(SIN_F64, "sin") +HANDLE_LIBCALL(SIN_F80, "sinl") +HANDLE_LIBCALL(SIN_F128, "sinl") +HANDLE_LIBCALL(SIN_PPCF128, "sinl") +HANDLE_LIBCALL(COS_F32, "cosf") +HANDLE_LIBCALL(COS_F64, "cos") +HANDLE_LIBCALL(COS_F80, "cosl") +HANDLE_LIBCALL(COS_F128, "cosl") +HANDLE_LIBCALL(COS_PPCF128, "cosl") +HANDLE_LIBCALL(SINCOS_F32, nullptr) +HANDLE_LIBCALL(SINCOS_F64, nullptr) +HANDLE_LIBCALL(SINCOS_F80, nullptr) +HANDLE_LIBCALL(SINCOS_F128, nullptr) +HANDLE_LIBCALL(SINCOS_PPCF128, nullptr) +HANDLE_LIBCALL(POW_F32, "powf") +HANDLE_LIBCALL(POW_F64, "pow") +HANDLE_LIBCALL(POW_F80, "powl") +HANDLE_LIBCALL(POW_F128, "powl") +HANDLE_LIBCALL(POW_PPCF128, "powl") +HANDLE_LIBCALL(CEIL_F32, "ceilf") +HANDLE_LIBCALL(CEIL_F64, "ceil") +HANDLE_LIBCALL(CEIL_F80, "ceill") +HANDLE_LIBCALL(CEIL_F128, "ceill") +HANDLE_LIBCALL(CEIL_PPCF128, "ceill") +HANDLE_LIBCALL(TRUNC_F32, "truncf") +HANDLE_LIBCALL(TRUNC_F64, "trunc") +HANDLE_LIBCALL(TRUNC_F80, "truncl") +HANDLE_LIBCALL(TRUNC_F128, "truncl") +HANDLE_LIBCALL(TRUNC_PPCF128, "truncl") +HANDLE_LIBCALL(RINT_F32, "rintf") +HANDLE_LIBCALL(RINT_F64, "rint") +HANDLE_LIBCALL(RINT_F80, "rintl") +HANDLE_LIBCALL(RINT_F128, "rintl") +HANDLE_LIBCALL(RINT_PPCF128, "rintl") +HANDLE_LIBCALL(NEARBYINT_F32, "nearbyintf") +HANDLE_LIBCALL(NEARBYINT_F64, "nearbyint") +HANDLE_LIBCALL(NEARBYINT_F80, "nearbyintl") +HANDLE_LIBCALL(NEARBYINT_F128, "nearbyintl") +HANDLE_LIBCALL(NEARBYINT_PPCF128, "nearbyintl") +HANDLE_LIBCALL(ROUND_F32, "roundf") +HANDLE_LIBCALL(ROUND_F64, "round") +HANDLE_LIBCALL(ROUND_F80, "roundl") +HANDLE_LIBCALL(ROUND_F128, "roundl") +HANDLE_LIBCALL(ROUND_PPCF128, "roundl") +HANDLE_LIBCALL(FLOOR_F32, "floorf") +HANDLE_LIBCALL(FLOOR_F64, "floor") +HANDLE_LIBCALL(FLOOR_F80, "floorl") +HANDLE_LIBCALL(FLOOR_F128, "floorl") +HANDLE_LIBCALL(FLOOR_PPCF128, "floorl") +HANDLE_LIBCALL(COPYSIGN_F32, "copysignf") +HANDLE_LIBCALL(COPYSIGN_F64, "copysign") +HANDLE_LIBCALL(COPYSIGN_F80, "copysignl") +HANDLE_LIBCALL(COPYSIGN_F128, "copysignl") +HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl") +HANDLE_LIBCALL(FMIN_F32, "fminf") +HANDLE_LIBCALL(FMIN_F64, "fmin") +HANDLE_LIBCALL(FMIN_F80, "fminl") +HANDLE_LIBCALL(FMIN_F128, "fminl") +HANDLE_LIBCALL(FMIN_PPCF128, "fminl") +HANDLE_LIBCALL(FMAX_F32, "fmaxf") +HANDLE_LIBCALL(FMAX_F64, "fmax") +HANDLE_LIBCALL(FMAX_F80, "fmaxl") +HANDLE_LIBCALL(FMAX_F128, "fmaxl") +HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl") + +// Conversion +HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq") +HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq") +HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2") +HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2") +HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") +HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee") +HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee") +HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2") +HANDLE_LIBCALL(FPROUND_F80_F16, "__truncxfhf2") +HANDLE_LIBCALL(FPROUND_F128_F16, "__trunctfhf2") +HANDLE_LIBCALL(FPROUND_PPCF128_F16, "__trunctfhf2") +HANDLE_LIBCALL(FPROUND_F64_F32, "__truncdfsf2") +HANDLE_LIBCALL(FPROUND_F80_F32, "__truncxfsf2") +HANDLE_LIBCALL(FPROUND_F128_F32, "__trunctfsf2") +HANDLE_LIBCALL(FPROUND_PPCF128_F32, "__gcc_qtos") +HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2") +HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2") +HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod") +HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi") +HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi") +HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti") +HANDLE_LIBCALL(FPTOSINT_F64_I32, "__fixdfsi") +HANDLE_LIBCALL(FPTOSINT_F64_I64, "__fixdfdi") +HANDLE_LIBCALL(FPTOSINT_F64_I128, "__fixdfti") +HANDLE_LIBCALL(FPTOSINT_F80_I32, "__fixxfsi") +HANDLE_LIBCALL(FPTOSINT_F80_I64, "__fixxfdi") +HANDLE_LIBCALL(FPTOSINT_F80_I128, "__fixxfti") +HANDLE_LIBCALL(FPTOSINT_F128_I32, "__fixtfsi") +HANDLE_LIBCALL(FPTOSINT_F128_I64, "__fixtfdi") +HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti") +HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou") +HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi") +HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti") +HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi") +HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi") +HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti") +HANDLE_LIBCALL(FPTOUINT_F64_I32, "__fixunsdfsi") +HANDLE_LIBCALL(FPTOUINT_F64_I64, "__fixunsdfdi") +HANDLE_LIBCALL(FPTOUINT_F64_I128, "__fixunsdfti") +HANDLE_LIBCALL(FPTOUINT_F80_I32, "__fixunsxfsi") +HANDLE_LIBCALL(FPTOUINT_F80_I64, "__fixunsxfdi") +HANDLE_LIBCALL(FPTOUINT_F80_I128, "__fixunsxfti") +HANDLE_LIBCALL(FPTOUINT_F128_I32, "__fixunstfsi") +HANDLE_LIBCALL(FPTOUINT_F128_I64, "__fixunstfdi") +HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti") +HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi") +HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi") +HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti") +HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf") +HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf") +HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf") +HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf") +HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq") +HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf") +HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf") +HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf") +HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf") +HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf") +HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf") +HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf") +HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf") +HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf") +HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf") +HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf") +HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf") +HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf") +HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf") +HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq") +HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf") +HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf") +HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf") +HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf") +HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf") +HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf") +HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf") +HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf") +HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf") +HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf") + +// Comparison +HANDLE_LIBCALL(OEQ_F32, "__eqsf2") +HANDLE_LIBCALL(OEQ_F64, "__eqdf2") +HANDLE_LIBCALL(OEQ_F128, "__eqtf2") +HANDLE_LIBCALL(OEQ_PPCF128, "__gcc_qeq") +HANDLE_LIBCALL(UNE_F32, "__nesf2") +HANDLE_LIBCALL(UNE_F64, "__nedf2") +HANDLE_LIBCALL(UNE_F128, "__netf2") +HANDLE_LIBCALL(UNE_PPCF128, "__gcc_qne") +HANDLE_LIBCALL(OGE_F32, "__gesf2") +HANDLE_LIBCALL(OGE_F64, "__gedf2") +HANDLE_LIBCALL(OGE_F128, "__getf2") +HANDLE_LIBCALL(OGE_PPCF128, "__gcc_qge") +HANDLE_LIBCALL(OLT_F32, "__ltsf2") +HANDLE_LIBCALL(OLT_F64, "__ltdf2") +HANDLE_LIBCALL(OLT_F128, "__lttf2") +HANDLE_LIBCALL(OLT_PPCF128, "__gcc_qlt") +HANDLE_LIBCALL(OLE_F32, "__lesf2") +HANDLE_LIBCALL(OLE_F64, "__ledf2") +HANDLE_LIBCALL(OLE_F128, "__letf2") +HANDLE_LIBCALL(OLE_PPCF128, "__gcc_qle") +HANDLE_LIBCALL(OGT_F32, "__gtsf2") +HANDLE_LIBCALL(OGT_F64, "__gtdf2") +HANDLE_LIBCALL(OGT_F128, "__gttf2") +HANDLE_LIBCALL(OGT_PPCF128, "__gcc_qgt") +HANDLE_LIBCALL(UO_F32, "__unordsf2") +HANDLE_LIBCALL(UO_F64, "__unorddf2") +HANDLE_LIBCALL(UO_F128, "__unordtf2") +HANDLE_LIBCALL(UO_PPCF128, "__gcc_qunord") +HANDLE_LIBCALL(O_F32, "__unordsf2") +HANDLE_LIBCALL(O_F64, "__unorddf2") +HANDLE_LIBCALL(O_F128, "__unordtf2") +HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord") + +// Memory +HANDLE_LIBCALL(MEMCPY, "memcpy") +HANDLE_LIBCALL(MEMMOVE, "memmove") +HANDLE_LIBCALL(MEMSET, "memset") + +// Element-wise unordered-atomic memory of different sizes +HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1") +HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memcpy_element_unordered_atomic_2") +HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memcpy_element_unordered_atomic_4") +HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memcpy_element_unordered_atomic_8") +HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memcpy_element_unordered_atomic_16") +HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memmove_element_unordered_atomic_1") +HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memmove_element_unordered_atomic_2") +HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memmove_element_unordered_atomic_4") +HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memmove_element_unordered_atomic_8") +HANDLE_LIBCALL(MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memmove_element_unordered_atomic_16") +HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memset_element_unordered_atomic_1") +HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_2, "__llvm_memset_element_unordered_atomic_2") +HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_4, "__llvm_memset_element_unordered_atomic_4") +HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_8, "__llvm_memset_element_unordered_atomic_8") +HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unordered_atomic_16") + +// Exception handling +HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume") + +// Note: there are two sets of atomics libcalls; see +// <https://llvm.org/docs/Atomics.html> for more info on the +// difference between them. + +// Atomic '__sync_*' libcalls. +HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_1, "__sync_val_compare_and_swap_1") +HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_2, "__sync_val_compare_and_swap_2") +HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_4, "__sync_val_compare_and_swap_4") +HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_8, "__sync_val_compare_and_swap_8") +HANDLE_LIBCALL(SYNC_VAL_COMPARE_AND_SWAP_16, "__sync_val_compare_and_swap_16") +HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_1, "__sync_lock_test_and_set_1") +HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_2, "__sync_lock_test_and_set_2") +HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_4, "__sync_lock_test_and_set_4") +HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_8, "__sync_lock_test_and_set_8") +HANDLE_LIBCALL(SYNC_LOCK_TEST_AND_SET_16, "__sync_lock_test_and_set_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_1, "__sync_fetch_and_add_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_2, "__sync_fetch_and_add_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_4, "__sync_fetch_and_add_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_8, "__sync_fetch_and_add_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_ADD_16, "__sync_fetch_and_add_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_1, "__sync_fetch_and_sub_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_2, "__sync_fetch_and_sub_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_4, "__sync_fetch_and_sub_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_8, "__sync_fetch_and_sub_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_SUB_16, "__sync_fetch_and_sub_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_AND_1, "__sync_fetch_and_and_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_AND_2, "__sync_fetch_and_and_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_AND_4, "__sync_fetch_and_and_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_AND_8, "__sync_fetch_and_and_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_AND_16, "__sync_fetch_and_and_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_OR_1, "__sync_fetch_and_or_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_OR_2, "__sync_fetch_and_or_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_OR_4, "__sync_fetch_and_or_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_OR_8, "__sync_fetch_and_or_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_OR_16, "__sync_fetch_and_or_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_1, "__sync_fetch_and_xor_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_2, "__sync_fetch_and_xor_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_4, "__sync_fetch_and_xor_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_8, "__sync_fetch_and_xor_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_XOR_16, "__sync_fetch_and_xor_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_1, "__sync_fetch_and_nand_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_2, "__sync_fetch_and_nand_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_4, "__sync_fetch_and_nand_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_8, "__sync_fetch_and_nand_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_NAND_16, "__sync_fetch_and_nand_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_1, "__sync_fetch_and_max_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_2, "__sync_fetch_and_max_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_4, "__sync_fetch_and_max_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_8, "__sync_fetch_and_max_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_MAX_16, "__sync_fetch_and_max_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_1, "__sync_fetch_and_umax_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_2, "__sync_fetch_and_umax_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_4, "__sync_fetch_and_umax_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_8, "__sync_fetch_and_umax_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMAX_16, "__sync_fetch_and_umax_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_1, "__sync_fetch_and_min_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_2, "__sync_fetch_and_min_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_4, "__sync_fetch_and_min_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_8, "__sync_fetch_and_min_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_MIN_16, "__sync_fetch_and_min_16") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_1, "__sync_fetch_and_umin_1") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_2, "__sync_fetch_and_umin_2") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_4, "__sync_fetch_and_umin_4") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_8, "__sync_fetch_and_umin_8") +HANDLE_LIBCALL(SYNC_FETCH_AND_UMIN_16, "__sync_fetch_and_umin_16") + +// Atomic `__atomic_*' libcalls. +HANDLE_LIBCALL(ATOMIC_LOAD, "__atomic_load") +HANDLE_LIBCALL(ATOMIC_LOAD_1, "__atomic_load_1") +HANDLE_LIBCALL(ATOMIC_LOAD_2, "__atomic_load_2") +HANDLE_LIBCALL(ATOMIC_LOAD_4, "__atomic_load_4") +HANDLE_LIBCALL(ATOMIC_LOAD_8, "__atomic_load_8") +HANDLE_LIBCALL(ATOMIC_LOAD_16, "__atomic_load_16") + +HANDLE_LIBCALL(ATOMIC_STORE, "__atomic_store") +HANDLE_LIBCALL(ATOMIC_STORE_1, "__atomic_store_1") +HANDLE_LIBCALL(ATOMIC_STORE_2, "__atomic_store_2") +HANDLE_LIBCALL(ATOMIC_STORE_4, "__atomic_store_4") +HANDLE_LIBCALL(ATOMIC_STORE_8, "__atomic_store_8") +HANDLE_LIBCALL(ATOMIC_STORE_16, "__atomic_store_16") + +HANDLE_LIBCALL(ATOMIC_EXCHANGE, "__atomic_exchange") +HANDLE_LIBCALL(ATOMIC_EXCHANGE_1, "__atomic_exchange_1") +HANDLE_LIBCALL(ATOMIC_EXCHANGE_2, "__atomic_exchange_2") +HANDLE_LIBCALL(ATOMIC_EXCHANGE_4, "__atomic_exchange_4") +HANDLE_LIBCALL(ATOMIC_EXCHANGE_8, "__atomic_exchange_8") +HANDLE_LIBCALL(ATOMIC_EXCHANGE_16, "__atomic_exchange_16") + +HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE, "__atomic_compare_exchange") +HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_1, "__atomic_compare_exchange_1") +HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_2, "__atomic_compare_exchange_2") +HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_4, "__atomic_compare_exchange_4") +HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_8, "__atomic_compare_exchange_8") +HANDLE_LIBCALL(ATOMIC_COMPARE_EXCHANGE_16, "__atomic_compare_exchange_16") + +HANDLE_LIBCALL(ATOMIC_FETCH_ADD_1, "__atomic_fetch_add_1") +HANDLE_LIBCALL(ATOMIC_FETCH_ADD_2, "__atomic_fetch_add_2") +HANDLE_LIBCALL(ATOMIC_FETCH_ADD_4, "__atomic_fetch_add_4") +HANDLE_LIBCALL(ATOMIC_FETCH_ADD_8, "__atomic_fetch_add_8") +HANDLE_LIBCALL(ATOMIC_FETCH_ADD_16, "__atomic_fetch_add_16") +HANDLE_LIBCALL(ATOMIC_FETCH_SUB_1, "__atomic_fetch_sub_1") +HANDLE_LIBCALL(ATOMIC_FETCH_SUB_2, "__atomic_fetch_sub_2") +HANDLE_LIBCALL(ATOMIC_FETCH_SUB_4, "__atomic_fetch_sub_4") +HANDLE_LIBCALL(ATOMIC_FETCH_SUB_8, "__atomic_fetch_sub_8") +HANDLE_LIBCALL(ATOMIC_FETCH_SUB_16, "__atomic_fetch_sub_16") +HANDLE_LIBCALL(ATOMIC_FETCH_AND_1, "__atomic_fetch_and_1") +HANDLE_LIBCALL(ATOMIC_FETCH_AND_2, "__atomic_fetch_and_2") +HANDLE_LIBCALL(ATOMIC_FETCH_AND_4, "__atomic_fetch_and_4") +HANDLE_LIBCALL(ATOMIC_FETCH_AND_8, "__atomic_fetch_and_8") +HANDLE_LIBCALL(ATOMIC_FETCH_AND_16, "__atomic_fetch_and_16") +HANDLE_LIBCALL(ATOMIC_FETCH_OR_1, "__atomic_fetch_or_1") +HANDLE_LIBCALL(ATOMIC_FETCH_OR_2, "__atomic_fetch_or_2") +HANDLE_LIBCALL(ATOMIC_FETCH_OR_4, "__atomic_fetch_or_4") +HANDLE_LIBCALL(ATOMIC_FETCH_OR_8, "__atomic_fetch_or_8") +HANDLE_LIBCALL(ATOMIC_FETCH_OR_16, "__atomic_fetch_or_16") +HANDLE_LIBCALL(ATOMIC_FETCH_XOR_1, "__atomic_fetch_xor_1") +HANDLE_LIBCALL(ATOMIC_FETCH_XOR_2, "__atomic_fetch_xor_2") +HANDLE_LIBCALL(ATOMIC_FETCH_XOR_4, "__atomic_fetch_xor_4") +HANDLE_LIBCALL(ATOMIC_FETCH_XOR_8, "__atomic_fetch_xor_8") +HANDLE_LIBCALL(ATOMIC_FETCH_XOR_16, "__atomic_fetch_xor_16") +HANDLE_LIBCALL(ATOMIC_FETCH_NAND_1, "__atomic_fetch_nand_1") +HANDLE_LIBCALL(ATOMIC_FETCH_NAND_2, "__atomic_fetch_nand_2") +HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4") +HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8") +HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16") + +// Stack Protector Fail +HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail") + +// Deoptimization +HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize") + +HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr) + +#undef HANDLE_LIBCALL diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 08151be110832..016bef1702c4c 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -28,471 +28,9 @@ namespace RTLIB { /// PPCISelLowering.cpp. /// enum Libcall { - // Integer - SHL_I16, - SHL_I32, - SHL_I64, - SHL_I128, - SRL_I16, - SRL_I32, - SRL_I64, - SRL_I128, - SRA_I16, - SRA_I32, - SRA_I64, - SRA_I128, - MUL_I8, - MUL_I16, - MUL_I32, - MUL_I64, - MUL_I128, - MULO_I32, - MULO_I64, - MULO_I128, - SDIV_I8, - SDIV_I16, - SDIV_I32, - SDIV_I64, - SDIV_I128, - UDIV_I8, - UDIV_I16, - UDIV_I32, - UDIV_I64, - UDIV_I128, - SREM_I8, - SREM_I16, - SREM_I32, - SREM_I64, - SREM_I128, - UREM_I8, - UREM_I16, - UREM_I32, - UREM_I64, - UREM_I128, - SDIVREM_I8, - SDIVREM_I16, - SDIVREM_I32, - SDIVREM_I64, - SDIVREM_I128, - UDIVREM_I8, - UDIVREM_I16, - UDIVREM_I32, - UDIVREM_I64, - UDIVREM_I128, - NEG_I32, - NEG_I64, - - // FLOATING POINT - ADD_F32, - ADD_F64, - ADD_F80, - ADD_F128, - ADD_PPCF128, - SUB_F32, - SUB_F64, - SUB_F80, - SUB_F128, - SUB_PPCF128, - MUL_F32, - MUL_F64, - MUL_F80, - MUL_F128, - MUL_PPCF128, - DIV_F32, - DIV_F64, - DIV_F80, - DIV_F128, - DIV_PPCF128, - REM_F32, - REM_F64, - REM_F80, - REM_F128, - REM_PPCF128, - FMA_F32, - FMA_F64, - FMA_F80, - FMA_F128, - FMA_PPCF128, - POWI_F32, - POWI_F64, - POWI_F80, - POWI_F128, - POWI_PPCF128, - SQRT_F32, - SQRT_F64, - SQRT_F80, - SQRT_F128, - SQRT_PPCF128, - LOG_F32, - LOG_F64, - LOG_F80, - LOG_F128, - LOG_PPCF128, - LOG2_F32, - LOG2_F64, - LOG2_F80, - LOG2_F128, - LOG2_PPCF128, - LOG10_F32, - LOG10_F64, - LOG10_F80, - LOG10_F128, - LOG10_PPCF128, - EXP_F32, - EXP_F64, - EXP_F80, - EXP_F128, - EXP_PPCF128, - EXP2_F32, - EXP2_F64, - EXP2_F80, - EXP2_F128, - EXP2_PPCF128, - SIN_F32, - SIN_F64, - SIN_F80, - SIN_F128, - SIN_PPCF128, - COS_F32, - COS_F64, - COS_F80, - COS_F128, - COS_PPCF128, - SINCOS_F32, - SINCOS_F64, - SINCOS_F80, - SINCOS_F128, - SINCOS_PPCF128, - POW_F32, - POW_F64, - POW_F80, - POW_F128, - POW_PPCF128, - CEIL_F32, - CEIL_F64, - CEIL_F80, - CEIL_F128, - CEIL_PPCF128, - TRUNC_F32, - TRUNC_F64, - TRUNC_F80, - TRUNC_F128, - TRUNC_PPCF128, - RINT_F32, - RINT_F64, - RINT_F80, - RINT_F128, - RINT_PPCF128, - NEARBYINT_F32, - NEARBYINT_F64, - NEARBYINT_F80, - NEARBYINT_F128, - NEARBYINT_PPCF128, - ROUND_F32, - ROUND_F64, - ROUND_F80, - ROUND_F128, - ROUND_PPCF128, - FLOOR_F32, - FLOOR_F64, - FLOOR_F80, - FLOOR_F128, - FLOOR_PPCF128, - COPYSIGN_F32, - COPYSIGN_F64, - COPYSIGN_F80, - COPYSIGN_F128, - COPYSIGN_PPCF128, - FMIN_F32, - FMIN_F64, - FMIN_F80, - FMIN_F128, - FMIN_PPCF128, - FMAX_F32, - FMAX_F64, - FMAX_F80, - FMAX_F128, - FMAX_PPCF128, - - // CONVERSION - FPEXT_F32_PPCF128, - FPEXT_F64_PPCF128, - FPEXT_F64_F128, - FPEXT_F32_F128, - FPEXT_F32_F64, - FPEXT_F16_F32, - FPROUND_F32_F16, - FPROUND_F64_F16, - FPROUND_F80_F16, - FPROUND_F128_F16, - FPROUND_PPCF128_F16, - FPROUND_F64_F32, - FPROUND_F80_F32, - FPROUND_F128_F32, - FPROUND_PPCF128_F32, - FPROUND_F80_F64, - FPROUND_F128_F64, - FPROUND_PPCF128_F64, - FPTOSINT_F32_I32, - FPTOSINT_F32_I64, - FPTOSINT_F32_I128, - FPTOSINT_F64_I32, - FPTOSINT_F64_I64, - FPTOSINT_F64_I128, - FPTOSINT_F80_I32, - FPTOSINT_F80_I64, - FPTOSINT_F80_I128, - FPTOSINT_F128_I32, - FPTOSINT_F128_I64, - FPTOSINT_F128_I128, - FPTOSINT_PPCF128_I32, - FPTOSINT_PPCF128_I64, - FPTOSINT_PPCF128_I128, - FPTOUINT_F32_I32, - FPTOUINT_F32_I64, - FPTOUINT_F32_I128, - FPTOUINT_F64_I32, - FPTOUINT_F64_I64, - FPTOUINT_F64_I128, - FPTOUINT_F80_I32, - FPTOUINT_F80_I64, - FPTOUINT_F80_I128, - FPTOUINT_F128_I32, - FPTOUINT_F128_I64, - FPTOUINT_F128_I128, - FPTOUINT_PPCF128_I32, - FPTOUINT_PPCF128_I64, - FPTOUINT_PPCF128_I128, - SINTTOFP_I32_F32, - SINTTOFP_I32_F64, - SINTTOFP_I32_F80, - SINTTOFP_I32_F128, - SINTTOFP_I32_PPCF128, - SINTTOFP_I64_F32, - SINTTOFP_I64_F64, - SINTTOFP_I64_F80, - SINTTOFP_I64_F128, - SINTTOFP_I64_PPCF128, - SINTTOFP_I128_F32, - SINTTOFP_I128_F64, - SINTTOFP_I128_F80, - SINTTOFP_I128_F128, - SINTTOFP_I128_PPCF128, - UINTTOFP_I32_F32, - UINTTOFP_I32_F64, - UINTTOFP_I32_F80, - UINTTOFP_I32_F128, - UINTTOFP_I32_PPCF128, - UINTTOFP_I64_F32, - UINTTOFP_I64_F64, - UINTTOFP_I64_F80, - UINTTOFP_I64_F128, - UINTTOFP_I64_PPCF128, - UINTTOFP_I128_F32, - UINTTOFP_I128_F64, - UINTTOFP_I128_F80, - UINTTOFP_I128_F128, - UINTTOFP_I128_PPCF128, - - // COMPARISON - OEQ_F32, - OEQ_F64, - OEQ_F128, - OEQ_PPCF128, - UNE_F32, - UNE_F64, - UNE_F128, - UNE_PPCF128, - OGE_F32, - OGE_F64, - OGE_F128, - OGE_PPCF128, - OLT_F32, - OLT_F64, - OLT_F128, - OLT_PPCF128, - OLE_F32, - OLE_F64, - OLE_F128, - OLE_PPCF128, - OGT_F32, - OGT_F64, - OGT_F128, - OGT_PPCF128, - UO_F32, - UO_F64, - UO_F128, - UO_PPCF128, - O_F32, - O_F64, - O_F128, - O_PPCF128, - - // MEMORY - MEMCPY, - MEMSET, - MEMMOVE, - - // ELEMENT-WISE UNORDERED-ATOMIC MEMORY of different element sizes - MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, - MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, - MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, - MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, - MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, - - MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, - MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, - MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, - MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, - MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, - - MEMSET_ELEMENT_UNORDERED_ATOMIC_1, - MEMSET_ELEMENT_UNORDERED_ATOMIC_2, - MEMSET_ELEMENT_UNORDERED_ATOMIC_4, - MEMSET_ELEMENT_UNORDERED_ATOMIC_8, - MEMSET_ELEMENT_UNORDERED_ATOMIC_16, - - // EXCEPTION HANDLING - UNWIND_RESUME, - - // Note: there's two sets of atomics libcalls; see - // <http://llvm.org/docs/Atomics.html> for more info on the - // difference between them. - - // Atomic '__sync_*' libcalls. - SYNC_VAL_COMPARE_AND_SWAP_1, - SYNC_VAL_COMPARE_AND_SWAP_2, - SYNC_VAL_COMPARE_AND_SWAP_4, - SYNC_VAL_COMPARE_AND_SWAP_8, - SYNC_VAL_COMPARE_AND_SWAP_16, - SYNC_LOCK_TEST_AND_SET_1, - SYNC_LOCK_TEST_AND_SET_2, - SYNC_LOCK_TEST_AND_SET_4, - SYNC_LOCK_TEST_AND_SET_8, - SYNC_LOCK_TEST_AND_SET_16, - SYNC_FETCH_AND_ADD_1, - SYNC_FETCH_AND_ADD_2, - SYNC_FETCH_AND_ADD_4, - SYNC_FETCH_AND_ADD_8, - SYNC_FETCH_AND_ADD_16, - SYNC_FETCH_AND_SUB_1, - SYNC_FETCH_AND_SUB_2, - SYNC_FETCH_AND_SUB_4, - SYNC_FETCH_AND_SUB_8, - SYNC_FETCH_AND_SUB_16, - SYNC_FETCH_AND_AND_1, - SYNC_FETCH_AND_AND_2, - SYNC_FETCH_AND_AND_4, - SYNC_FETCH_AND_AND_8, - SYNC_FETCH_AND_AND_16, - SYNC_FETCH_AND_OR_1, - SYNC_FETCH_AND_OR_2, - SYNC_FETCH_AND_OR_4, - SYNC_FETCH_AND_OR_8, - SYNC_FETCH_AND_OR_16, - SYNC_FETCH_AND_XOR_1, - SYNC_FETCH_AND_XOR_2, - SYNC_FETCH_AND_XOR_4, - SYNC_FETCH_AND_XOR_8, - SYNC_FETCH_AND_XOR_16, - SYNC_FETCH_AND_NAND_1, - SYNC_FETCH_AND_NAND_2, - SYNC_FETCH_AND_NAND_4, - SYNC_FETCH_AND_NAND_8, - SYNC_FETCH_AND_NAND_16, - SYNC_FETCH_AND_MAX_1, - SYNC_FETCH_AND_MAX_2, - SYNC_FETCH_AND_MAX_4, - SYNC_FETCH_AND_MAX_8, - SYNC_FETCH_AND_MAX_16, - SYNC_FETCH_AND_UMAX_1, - SYNC_FETCH_AND_UMAX_2, - SYNC_FETCH_AND_UMAX_4, - SYNC_FETCH_AND_UMAX_8, - SYNC_FETCH_AND_UMAX_16, - SYNC_FETCH_AND_MIN_1, - SYNC_FETCH_AND_MIN_2, - SYNC_FETCH_AND_MIN_4, - SYNC_FETCH_AND_MIN_8, - SYNC_FETCH_AND_MIN_16, - SYNC_FETCH_AND_UMIN_1, - SYNC_FETCH_AND_UMIN_2, - SYNC_FETCH_AND_UMIN_4, - SYNC_FETCH_AND_UMIN_8, - SYNC_FETCH_AND_UMIN_16, - - // Atomic '__atomic_*' libcalls. - ATOMIC_LOAD, - ATOMIC_LOAD_1, - ATOMIC_LOAD_2, - ATOMIC_LOAD_4, - ATOMIC_LOAD_8, - ATOMIC_LOAD_16, - - ATOMIC_STORE, - ATOMIC_STORE_1, - ATOMIC_STORE_2, - ATOMIC_STORE_4, - ATOMIC_STORE_8, - ATOMIC_STORE_16, - - ATOMIC_EXCHANGE, - ATOMIC_EXCHANGE_1, - ATOMIC_EXCHANGE_2, - ATOMIC_EXCHANGE_4, - ATOMIC_EXCHANGE_8, - ATOMIC_EXCHANGE_16, - - ATOMIC_COMPARE_EXCHANGE, - ATOMIC_COMPARE_EXCHANGE_1, - ATOMIC_COMPARE_EXCHANGE_2, - ATOMIC_COMPARE_EXCHANGE_4, - ATOMIC_COMPARE_EXCHANGE_8, - ATOMIC_COMPARE_EXCHANGE_16, - - ATOMIC_FETCH_ADD_1, - ATOMIC_FETCH_ADD_2, - ATOMIC_FETCH_ADD_4, - ATOMIC_FETCH_ADD_8, - ATOMIC_FETCH_ADD_16, - - ATOMIC_FETCH_SUB_1, - ATOMIC_FETCH_SUB_2, - ATOMIC_FETCH_SUB_4, - ATOMIC_FETCH_SUB_8, - ATOMIC_FETCH_SUB_16, - - ATOMIC_FETCH_AND_1, - ATOMIC_FETCH_AND_2, - ATOMIC_FETCH_AND_4, - ATOMIC_FETCH_AND_8, - ATOMIC_FETCH_AND_16, - - ATOMIC_FETCH_OR_1, - ATOMIC_FETCH_OR_2, - ATOMIC_FETCH_OR_4, - ATOMIC_FETCH_OR_8, - ATOMIC_FETCH_OR_16, - - ATOMIC_FETCH_XOR_1, - ATOMIC_FETCH_XOR_2, - ATOMIC_FETCH_XOR_4, - ATOMIC_FETCH_XOR_8, - ATOMIC_FETCH_XOR_16, - - ATOMIC_FETCH_NAND_1, - ATOMIC_FETCH_NAND_2, - ATOMIC_FETCH_NAND_4, - ATOMIC_FETCH_NAND_8, - ATOMIC_FETCH_NAND_16, - - // Stack Protector Fail. - STACKPROTECTOR_CHECK_FAIL, - - // Deoptimization. - DEOPTIMIZE, - - UNKNOWN_LIBCALL +#define HANDLE_LIBCALL(code, name) code, + #include "RuntimeLibcalls.def" +#undef HANDLE_LIBCALL }; /// getFPEXT - Return the FPEXT_*_* value for the given types, or diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 25afc5b506df6..f3f2f05b877da 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -22,8 +22,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" #include <cassert> #include <cstddef> #include <iterator> diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 218e22e402349..14882205584e2 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -24,9 +24,9 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/LaneBitmask.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <cstdint> #include <list> @@ -275,6 +275,11 @@ namespace llvm { /// Returns an existing SUnit for this MI, or nullptr. SUnit *getSUnit(MachineInstr *MI) const; + /// If this method returns true, handling of the scheduling regions + /// themselves (in case of a scheduling boundary in MBB) will be done + /// beginning with the topmost region of MBB. + virtual bool doMBBSchedRegionsTopDown() const { return false; } + /// Prepares to perform scheduling in the given block. virtual void startBlock(MachineBasicBlock *BB); diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index d6851f7143a51..6a5c2db34bb10 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -211,6 +211,7 @@ class SelectionDAG { const SelectionDAGTargetInfo *TSI = nullptr; const TargetLowering *TLI = nullptr; MachineFunction *MF; + Pass *SDAGISelPass = nullptr; LLVMContext *Context; CodeGenOpt::Level OptLevel; @@ -335,6 +336,14 @@ private: .getRawSubclassData(); } + template <typename SDNodeTy> + static uint16_t getSyntheticNodeSubclassData(unsigned Opc, unsigned Order, + SDVTList VTs, EVT MemoryVT, + MachineMemOperand *MMO) { + return SDNodeTy(Opc, Order, DebugLoc(), VTs, MemoryVT, MMO) + .getRawSubclassData(); + } + void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { assert(!Node->OperandList && "Node already has operands"); SDUse *Ops = OperandRecycler.allocate( @@ -366,13 +375,16 @@ public: ~SelectionDAG(); /// Prepare this SelectionDAG to process code in the given MachineFunction. - void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE); + void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, + Pass *PassPtr); /// Clear state and free memory necessary to make this /// SelectionDAG ready to process a new block. void clear(); MachineFunction &getMachineFunction() const { return *MF; } + const Pass *getPass() const { return SDAGISelPass; } + const DataLayout &getDataLayout() const { return MF->getDataLayout(); } const TargetMachine &getTarget() const { return TM; } const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); } @@ -631,6 +643,8 @@ public: SDValue getRegister(unsigned Reg, EVT VT); SDValue getRegisterMask(const uint32_t *RegMask); SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label); + SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root, + MCSymbol *Label); SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0, bool isTarget = false, unsigned char TargetFlags = 0); @@ -782,6 +796,24 @@ public: /// \brief Create a logical NOT operation as (XOR Val, BooleanOne). SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT); + /// \brief Create an add instruction with appropriate flags when used for + /// addressing some offset of an object. i.e. if a load is split into multiple + /// components, create an add nuw from the base pointer to the offset. + SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) { + EVT VT = Op.getValueType(); + return getObjectPtrOffset(SL, Op, getConstant(Offset, SL, VT)); + } + + SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, SDValue Offset) { + EVT VT = Op.getValueType(); + + // The object itself can't wrap around the address space, so it shouldn't be + // possible for the adds of the offsets to the split parts to overflow. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + return getNode(ISD::ADD, SL, VT, Op, Offset, Flags); + } + /// Return a new CALLSEQ_START node, that starts new call frame, in which /// InSize bytes are set up inside CALLSEQ_START..CALLSEQ_END sequence and /// OutSize specifies part of the frame set up prior to the sequence. @@ -956,11 +988,14 @@ public: /// result and takes a list of operands. Opcode may be INTRINSIC_VOID, /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not /// less than FIRST_TARGET_MEMORY_OPCODE. - SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, - ArrayRef<SDValue> Ops, EVT MemVT, - MachinePointerInfo PtrInfo, unsigned Align = 0, - bool Vol = false, bool ReadMem = true, - bool WriteMem = true, unsigned Size = 0); + SDValue getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, + ArrayRef<SDValue> Ops, EVT MemVT, + MachinePointerInfo PtrInfo, + unsigned Align = 0, + MachineMemOperand::Flags Flags + = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, + unsigned Size = 0); SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, @@ -1166,19 +1201,26 @@ public: const SDNodeFlags Flags = SDNodeFlags()); /// Creates a SDDbgValue node. - SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, - bool IsIndirect, uint64_t Off, const DebugLoc &DL, + SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, + unsigned R, bool IsIndirect, const DebugLoc &DL, unsigned O); - /// Constant - SDDbgValue *getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C, - uint64_t Off, const DebugLoc &DL, unsigned O); + /// Creates a constant SDDbgValue node. + SDDbgValue *getConstantDbgValue(DIVariable *Var, DIExpression *Expr, + const Value *C, const DebugLoc &DL, + unsigned O); - /// FrameIndex - SDDbgValue *getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, - uint64_t Off, const DebugLoc &DL, + /// Creates a FrameIndex SDDbgValue node. + SDDbgValue *getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr, + unsigned FI, const DebugLoc &DL, unsigned O); + /// Transfer debug values from one node to another, while optionally + /// generating fragment expressions for split-up values. If \p InvalidateDbg + /// is set, debug values are invalidated after they are transferred. + void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits = 0, + unsigned SizeInBits = 0, bool InvalidateDbg = true); + /// Remove the specified node from the system. If any of its /// operands then becomes dead, remove them as well. Inform UpdateListener /// for each node deleted. @@ -1208,7 +1250,7 @@ public: void ReplaceAllUsesWith(SDNode *From, const SDValue *To); /// Replace any uses of From with To, leaving - /// uses of other values produced by From.Val alone. + /// uses of other values produced by From.getNode() alone. void ReplaceAllUsesOfValueWith(SDValue From, SDValue To); /// Like ReplaceAllUsesOfValueWith, but for multiple values at once. @@ -1259,10 +1301,6 @@ public: return DbgInfo->getSDDbgValues(SD); } -private: - /// Transfer SDDbgValues. Called via ReplaceAllUses{OfValue}?With - void TransferDbgValues(SDValue From, SDValue To); - public: /// Return true if there are any SDDbgValue nodes associated /// with this SelectionDAG. @@ -1279,6 +1317,10 @@ public: return DbgInfo->ByvalParmDbgEnd(); } + /// To be invoked on an SDNode that is slated to be erased. This + /// function mirrors \c llvm::salvageDebugInfo. + void salvageDebugInfo(SDNode &N); + void dump() const; /// Create a stack temporary, suitable for holding the specified value type. @@ -1308,6 +1350,14 @@ public: SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl); + /// See if the specified operand can be simplified with the knowledge that only + /// the bits specified by Mask are used. If so, return the simpler operand, + /// otherwise return a null SDValue. + /// + /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can + /// simplify nodes with multiple uses more aggressively.) + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + /// Return true if the sign bit of Op is known to be zero. /// We use this predicate to simplify operations downstream. bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const; diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 2107e5a313819..18e4c7a83defb 100644 --- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -1,5 +1,4 @@ -//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.h ------- DAG Address Analysis -//---*- C++ -*-===// +//===- SelectionDAGAddressAnalysis.h - DAG Address Analysis -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,16 +6,17 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// #ifndef LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H #define LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H -#include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include <cstdint> namespace llvm { + +class SelectionDAG; + /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need @@ -34,12 +34,11 @@ class BaseIndexOffset { private: SDValue Base; SDValue Index; - int64_t Offset; - bool IsIndexSignExt; + int64_t Offset = 0; + bool IsIndexSignExt = false; public: - BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} - + BaseIndexOffset() = default; BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, bool IsIndexSignExt) : Base(Base), Index(Index), Offset(Offset), @@ -59,6 +58,7 @@ public: /// Parses tree in Ptr for base, index, offset addresses. static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG); }; -} // namespace llvm -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_SELECTIONDAGADDRESSANALYSIS_H diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 591b2f773344d..de6849a1eae12 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <memory> namespace llvm { @@ -130,6 +130,7 @@ public: OPC_CheckOpcode, OPC_SwitchOpcode, OPC_CheckType, + OPC_CheckTypeRes, OPC_SwitchType, OPC_CheckChild0Type, OPC_CheckChild1Type, OPC_CheckChild2Type, OPC_CheckChild3Type, OPC_CheckChild4Type, OPC_CheckChild5Type, @@ -275,6 +276,8 @@ public: return false; } + bool isOrEquivalentToAdd(const SDNode *N) const; + private: // Calls to these functions are generated by tblgen. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 5fb69ae232af8..7de2e766d521a 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -85,10 +85,7 @@ namespace ISD { /// If N is a BUILD_VECTOR node whose elements are all the same constant or /// undefined, return true and return the constant value in \p SplatValue. - /// This sets \p SplatValue to the smallest possible splat unless AllowShrink - /// is set to false. - bool isConstantSplatVector(const SDNode *N, APInt &SplatValue, - bool AllowShrink = true); + bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); /// Return true if the specified node is a BUILD_VECTOR where all of the /// elements are ~0 or undef. @@ -626,13 +623,14 @@ public: /// Test if this node is a strict floating point pseudo-op. bool isStrictFPOpcode() { switch (NodeType) { - default: + default: return false; case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: case ISD::STRICT_FDIV: case ISD::STRICT_FREM: + case ISD::STRICT_FMA: case ISD::STRICT_FSQRT: case ISD::STRICT_FPOW: case ISD::STRICT_FPOWI: @@ -1436,6 +1434,9 @@ public: const APInt &getAPIntValue() const { return Value->getValue(); } uint64_t getZExtValue() const { return Value->getZExtValue(); } int64_t getSExtValue() const { return Value->getSExtValue(); } + uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) { + return Value->getLimitedValue(Limit); + } bool isOne() const { return Value->isOne(); } bool isNullValue() const { return Value->isZero(); } @@ -1489,11 +1490,7 @@ public: /// convenient to write "2.0" and the like. Without this function we'd /// have to duplicate its logic everywhere it's called. bool isExactlyValue(double V) const { - bool ignored; - APFloat Tmp(V); - Tmp.convert(Value->getValueAPF().getSemantics(), - APFloat::rmNearestTiesToEven, &ignored); - return isExactlyValue(Tmp); + return Value->getValueAPF().isExactlyValue(V); } bool isExactlyValue(const APFloat& V) const; @@ -1850,19 +1847,20 @@ public: } }; -class EHLabelSDNode : public SDNode { +class LabelSDNode : public SDNode { friend class SelectionDAG; MCSymbol *Label; - EHLabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L) + LabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L) : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {} public: MCSymbol *getLabel() const { return Label; } static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::EH_LABEL; + return N->getOpcode() == ISD::EH_LABEL || + N->getOpcode() == ISD::ANNOTATION_LABEL; } }; @@ -2017,6 +2015,9 @@ public: /// For integers this is the same as doing a TRUNCATE and storing the result. /// For floats, it is the same as doing an FP_ROUND and storing the result. bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } + void setTruncatingStore(bool Truncating) { + StoreSDNodeBits.IsTruncating = Truncating; + } const SDValue &getValue() const { return getOperand(1); } const SDValue &getBasePtr() const { return getOperand(2); } @@ -2113,7 +2114,7 @@ class MaskedGatherScatterSDNode : public MemSDNode { public: friend class SelectionDAG; - MaskedGatherScatterSDNode(unsigned NodeTy, unsigned Order, + MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} diff --git a/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/include/llvm/CodeGen/SelectionDAGTargetInfo.h index ac5092af8def3..45c1df48a5e6f 100644 --- a/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info -*- C++ -*-==// +//==- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -16,21 +16,24 @@ #ifndef LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H #define LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CodeGen.h" +#include <utility> namespace llvm { +class SelectionDAG; + //===----------------------------------------------------------------------===// /// Targets can subclass this to parameterize the /// SelectionDAG lowering and instruction selection process. /// class SelectionDAGTargetInfo { - SelectionDAGTargetInfo(const SelectionDAGTargetInfo &) = delete; - void operator=(const SelectionDAGTargetInfo &) = delete; - public: explicit SelectionDAGTargetInfo() = default; + SelectionDAGTargetInfo(const SelectionDAGTargetInfo &) = delete; + SelectionDAGTargetInfo &operator=(const SelectionDAGTargetInfo &) = delete; virtual ~SelectionDAGTargetInfo(); /// Emit target-specific code that performs a memcpy. @@ -144,6 +147,7 @@ public: MachinePointerInfo SrcPtrInfo) const { return std::make_pair(SDValue(), SDValue()); } + // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather // than FMUL and ADD is delegated to the machine combiner. virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const { @@ -151,6 +155,6 @@ public: } }; -} // end llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SELECTIONDAGTARGETINFO_H diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index a7b16e7a9ed22..3a91e363f9231 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -139,7 +139,7 @@ class raw_ostream; }; /// Construct an invalid index. - SlotIndex() : lie(nullptr, 0) {} + SlotIndex() = default; // Construct a new slot index from the given one, and set the slot. SlotIndex(const SlotIndex &li, Slot s) : lie(li.listEntry(), unsigned(s)) { diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index 8263946ed9280..4407114d2741b 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include <algorithm> #include <cassert> @@ -25,7 +26,6 @@ namespace llvm { class AsmPrinter; class MCExpr; class MCStreamer; -class MCSymbol; class raw_ostream; class TargetRegisterInfo; diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h index 483c0ab1eec9e..be6562c85f2ef 100644 --- a/include/llvm/CodeGen/TailDuplicator.h +++ b/include/llvm/CodeGen/TailDuplicator.h @@ -19,11 +19,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include <utility> #include <vector> @@ -61,13 +57,14 @@ class TailDuplicator { public: /// Prepare to run on a specific machine function. /// @param MF - Function that will be processed + /// @param PreRegAlloc - true if used before register allocation /// @param MBPI - Branch Probability Info. Used to propagate correct /// probabilities when modifying the CFG. /// @param LayoutMode - When true, don't use the existing layout to make /// decisions. /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. Zero /// default implies using the command line value TailDupSize. - void initMF(MachineFunction &MF, + void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, bool LayoutMode, unsigned TailDupSize = 0); diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h new file mode 100644 index 0000000000000..8646a15599cbc --- /dev/null +++ b/include/llvm/CodeGen/TargetCallingConv.h @@ -0,0 +1,204 @@ +//===-- llvm/CodeGen/TargetCallingConv.h - Calling Convention ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines types for working with calling-convention information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETCALLINGCONV_H +#define LLVM_CODEGEN_TARGETCALLINGCONV_H + +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <climits> +#include <cstdint> + +namespace llvm { +namespace ISD { + + struct ArgFlagsTy { + private: + unsigned IsZExt : 1; ///< Zero extended + unsigned IsSExt : 1; ///< Sign extended + unsigned IsInReg : 1; ///< Passed in register + unsigned IsSRet : 1; ///< Hidden struct-ret ptr + unsigned IsByVal : 1; ///< Struct passed by value + unsigned IsNest : 1; ///< Nested fn static chain + unsigned IsReturned : 1; ///< Always returned + unsigned IsSplit : 1; + unsigned IsInAlloca : 1; ///< Passed with inalloca + unsigned IsSplitEnd : 1; ///< Last part of a split + unsigned IsSwiftSelf : 1; ///< Swift self parameter + unsigned IsSwiftError : 1; ///< Swift error parameter + unsigned IsHva : 1; ///< HVA field for + unsigned IsHvaStart : 1; ///< HVA structure start + unsigned IsSecArgPass : 1; ///< Second argument + unsigned ByValAlign : 4; ///< Log 2 of byval alignment + unsigned OrigAlign : 5; ///< Log 2 of original alignment + unsigned IsInConsecutiveRegsLast : 1; + unsigned IsInConsecutiveRegs : 1; + unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate + + unsigned ByValSize; ///< Byval struct size + + public: + ArgFlagsTy() + : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0), + IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0), + IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0), + IsSecArgPass(0), ByValAlign(0), OrigAlign(0), + IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), + IsCopyElisionCandidate(0), ByValSize(0) { + static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big"); + } + + bool isZExt() const { return IsZExt; } + void setZExt() { IsZExt = 1; } + + bool isSExt() const { return IsSExt; } + void setSExt() { IsSExt = 1; } + + bool isInReg() const { return IsInReg; } + void setInReg() { IsInReg = 1; } + + bool isSRet() const { return IsSRet; } + void setSRet() { IsSRet = 1; } + + bool isByVal() const { return IsByVal; } + void setByVal() { IsByVal = 1; } + + bool isInAlloca() const { return IsInAlloca; } + void setInAlloca() { IsInAlloca = 1; } + + bool isSwiftSelf() const { return IsSwiftSelf; } + void setSwiftSelf() { IsSwiftSelf = 1; } + + bool isSwiftError() const { return IsSwiftError; } + void setSwiftError() { IsSwiftError = 1; } + + bool isHva() const { return IsHva; } + void setHva() { IsHva = 1; } + + bool isHvaStart() const { return IsHvaStart; } + void setHvaStart() { IsHvaStart = 1; } + + bool isSecArgPass() const { return IsSecArgPass; } + void setSecArgPass() { IsSecArgPass = 1; } + + bool isNest() const { return IsNest; } + void setNest() { IsNest = 1; } + + bool isReturned() const { return IsReturned; } + void setReturned() { IsReturned = 1; } + + bool isInConsecutiveRegs() const { return IsInConsecutiveRegs; } + void setInConsecutiveRegs() { IsInConsecutiveRegs = 1; } + + bool isInConsecutiveRegsLast() const { return IsInConsecutiveRegsLast; } + void setInConsecutiveRegsLast() { IsInConsecutiveRegsLast = 1; } + + bool isSplit() const { return IsSplit; } + void setSplit() { IsSplit = 1; } + + bool isSplitEnd() const { return IsSplitEnd; } + void setSplitEnd() { IsSplitEnd = 1; } + + bool isCopyElisionCandidate() const { return IsCopyElisionCandidate; } + void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; } + + unsigned getByValAlign() const { return (1U << ByValAlign) / 2; } + void setByValAlign(unsigned A) { + ByValAlign = Log2_32(A) + 1; + assert(getByValAlign() == A && "bitfield overflow"); + } + + unsigned getOrigAlign() const { return (1U << OrigAlign) / 2; } + void setOrigAlign(unsigned A) { + OrigAlign = Log2_32(A) + 1; + assert(getOrigAlign() == A && "bitfield overflow"); + } + + unsigned getByValSize() const { return ByValSize; } + void setByValSize(unsigned S) { ByValSize = S; } + }; + + /// InputArg - This struct carries flags and type information about a + /// single incoming (formal) argument or incoming (from the perspective + /// of the caller) return value virtual register. + /// + struct InputArg { + ArgFlagsTy Flags; + MVT VT = MVT::Other; + EVT ArgVT; + bool Used = false; + + /// Index original Function's argument. + unsigned OrigArgIndex; + /// Sentinel value for implicit machine-level input arguments. + static const unsigned NoArgIndex = UINT_MAX; + + /// Offset in bytes of current input value relative to the beginning of + /// original argument. E.g. if argument was splitted into four 32 bit + /// registers, we got 4 InputArgs with PartOffsets 0, 4, 8 and 12. + unsigned PartOffset; + + InputArg() = default; + InputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool used, + unsigned origIdx, unsigned partOffs) + : Flags(flags), Used(used), OrigArgIndex(origIdx), PartOffset(partOffs) { + VT = vt.getSimpleVT(); + ArgVT = argvt; + } + + bool isOrigArg() const { + return OrigArgIndex != NoArgIndex; + } + + unsigned getOrigArgIndex() const { + assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument"); + return OrigArgIndex; + } + }; + + /// OutputArg - This struct carries flags and a value for a + /// single outgoing (actual) argument or outgoing (from the perspective + /// of the caller) return value virtual register. + /// + struct OutputArg { + ArgFlagsTy Flags; + MVT VT; + EVT ArgVT; + + /// IsFixed - Is this a "fixed" value, ie not passed through a vararg "...". + bool IsFixed = false; + + /// Index original Function's argument. + unsigned OrigArgIndex; + + /// Offset in bytes of current output value relative to the beginning of + /// original argument. E.g. if argument was splitted into four 32 bit + /// registers, we got 4 OutputArgs with PartOffsets 0, 4, 8 and 12. + unsigned PartOffset; + + OutputArg() = default; + OutputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool isfixed, + unsigned origIdx, unsigned partOffs) + : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx), + PartOffset(partOffs) { + VT = vt.getSimpleVT(); + ArgVT = argvt; + } + }; + +} // end namespace ISD +} // end namespace llvm + +#endif // LLVM_CODEGEN_TARGETCALLINGCONV_H diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h new file mode 100644 index 0000000000000..61f1cf07bcf2c --- /dev/null +++ b/include/llvm/CodeGen/TargetFrameLowering.h @@ -0,0 +1,348 @@ +//===-- llvm/CodeGen/TargetFrameLowering.h ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface to describe the layout of a stack frame on the target machine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETFRAMELOWERING_H +#define LLVM_CODEGEN_TARGETFRAMELOWERING_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include <utility> +#include <vector> + +namespace llvm { + class BitVector; + class CalleeSavedInfo; + class MachineFunction; + class RegScavenger; + +/// Information about stack frame layout on the target. It holds the direction +/// of stack growth, the known stack alignment on entry to each function, and +/// the offset to the locals area. +/// +/// The offset to the local area is the offset from the stack pointer on +/// function entry to the first location where function data (local variables, +/// spill locations) can be stored. +class TargetFrameLowering { +public: + enum StackDirection { + StackGrowsUp, // Adding to the stack increases the stack address + StackGrowsDown // Adding to the stack decreases the stack address + }; + + // Maps a callee saved register to a stack slot with a fixed offset. + struct SpillSlot { + unsigned Reg; + int Offset; // Offset relative to stack pointer on function entry. + }; +private: + StackDirection StackDir; + unsigned StackAlignment; + unsigned TransientStackAlignment; + int LocalAreaOffset; + bool StackRealignable; +public: + TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1, bool StackReal = true) + : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), + LocalAreaOffset(LAO), StackRealignable(StackReal) {} + + virtual ~TargetFrameLowering(); + + // These methods return information that describes the abstract stack layout + // of the target machine. + + /// getStackGrowthDirection - Return the direction the stack grows + /// + StackDirection getStackGrowthDirection() const { return StackDir; } + + /// getStackAlignment - This method returns the number of bytes to which the + /// stack pointer must be aligned on entry to a function. Typically, this + /// is the largest alignment for any data object in the target. + /// + unsigned getStackAlignment() const { return StackAlignment; } + + /// alignSPAdjust - This method aligns the stack adjustment to the correct + /// alignment. + /// + int alignSPAdjust(int SPAdj) const { + if (SPAdj < 0) { + SPAdj = -alignTo(-SPAdj, StackAlignment); + } else { + SPAdj = alignTo(SPAdj, StackAlignment); + } + return SPAdj; + } + + /// getTransientStackAlignment - This method returns the number of bytes to + /// which the stack pointer must be aligned at all times, even between + /// calls. + /// + unsigned getTransientStackAlignment() const { + return TransientStackAlignment; + } + + /// isStackRealignable - This method returns whether the stack can be + /// realigned. + bool isStackRealignable() const { + return StackRealignable; + } + + /// Return the skew that has to be applied to stack alignment under + /// certain conditions (e.g. stack was adjusted before function \p MF + /// was called). + virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const; + + /// getOffsetOfLocalArea - This method returns the offset of the local area + /// from the stack pointer on entrance to a function. + /// + int getOffsetOfLocalArea() const { return LocalAreaOffset; } + + /// isFPCloseToIncomingSP - Return true if the frame pointer is close to + /// the incoming stack pointer, false if it is close to the post-prologue + /// stack pointer. + virtual bool isFPCloseToIncomingSP() const { return true; } + + /// assignCalleeSavedSpillSlots - Allows target to override spill slot + /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should + /// assign frame slots to all CSI entries and return true. If this method + /// returns false, spill slots will be assigned using generic implementation. + /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of + /// CSI. + virtual bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + return false; + } + + /// getCalleeSavedSpillSlots - This method returns a pointer to an array of + /// pairs, that contains an entry for each callee saved register that must be + /// spilled to a particular stack location if it is spilled. + /// + /// Each entry in this array contains a <register,offset> pair, indicating the + /// fixed offset from the incoming stack pointer that each register should be + /// spilled at. If a register is not listed here, the code generator is + /// allowed to spill it anywhere it chooses. + /// + virtual const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = 0; + return nullptr; + } + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + virtual bool targetHandlesStackFrameRounding() const { + return false; + } + + /// Returns true if the target will correctly handle shrink wrapping. + virtual bool enableShrinkWrapping(const MachineFunction &MF) const { + return false; + } + + /// Returns true if the stack slot holes in the fixed and callee-save stack + /// area should be used when allocating other stack locations to reduce stack + /// size. + virtual bool enableStackSlotScavenging(const MachineFunction &MF) const { + return false; + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + virtual void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const = 0; + virtual void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const = 0; + + /// Replace a StackProbe stub (if any) with the actual probe code inline + virtual void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} + + /// Adjust the prologue to have the function use segmented stacks. This works + /// by adding a check even before the "normal" function prologue. + virtual void adjustForSegmentedStacks(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} + + /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in + /// the assembly prologue to explicitly handle the stack. + virtual void adjustForHiPEPrologue(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} + + /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee + /// saved registers and returns true if it isn't possible / profitable to do + /// so by issuing a series of store instructions via + /// storeRegToStackSlot(). Returns false otherwise. + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + return false; + } + + /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee + /// saved registers and returns true if it isn't possible / profitable to do + /// so by issuing a series of load instructions via loadRegToStackSlot(). + /// If it returns true, and any of the registers in CSI is not restored, + /// it sets the corresponding Restored flag in CSI to false. + /// Returns false otherwise. + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + return false; + } + + /// Return true if the target needs to disable frame pointer elimination. + virtual bool noFramePointerElim(const MachineFunction &MF) const; + + /// hasFP - Return true if the specified function should have a dedicated + /// frame pointer register. For most targets this is true only if the function + /// has variable sized allocas or if frame pointer elimination is disabled. + virtual bool hasFP(const MachineFunction &MF) const = 0; + + /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is + /// not required, we reserve argument space for call sites in the function + /// immediately on entry to the current function. This eliminates the need for + /// add/sub sp brackets around call sites. Returns true if the call frame is + /// included as part of the stack frame. + virtual bool hasReservedCallFrame(const MachineFunction &MF) const { + return !hasFP(MF); + } + + /// canSimplifyCallFramePseudos - When possible, it's best to simplify the + /// call frame pseudo ops before doing frame index elimination. This is + /// possible only when frame index references between the pseudos won't + /// need adjusting for the call frame adjustments. Normally, that's true + /// if the function has a reserved call frame or a frame pointer. Some + /// targets (Thumb2, for example) may have more complicated criteria, + /// however, and can override this behavior. + virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const { + return hasReservedCallFrame(MF) || hasFP(MF); + } + + // needsFrameIndexResolution - Do we need to perform FI resolution for + // this function. Normally, this is required only when the function + // has any stack objects. However, targets may want to override this. + virtual bool needsFrameIndexResolution(const MachineFunction &MF) const; + + /// getFrameIndexReference - This method should return the base register + /// and offset used to reference a frame index location. The offset is + /// returned directly, and the base register is returned via FrameReg. + virtual int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const; + + /// Same as \c getFrameIndexReference, except that the stack pointer (as + /// opposed to the frame pointer) will be the preferred value for \p + /// FrameReg. This is generally used for emitting statepoint or EH tables that + /// use offsets from RSP. If \p IgnoreSPUpdates is true, the returned + /// offset is only guaranteed to be valid with respect to the value of SP at + /// the end of the prologue. + virtual int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, + unsigned &FrameReg, + bool IgnoreSPUpdates) const { + // Always safe to dispatch to getFrameIndexReference. + return getFrameIndexReference(MF, FI, FrameReg); + } + + /// This method determines which of the registers reported by + /// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved. + /// The default implementation checks populates the \p SavedRegs bitset with + /// all registers which are modified in the function, targets may override + /// this function to save additional registers. + /// This method also sets up the register scavenger ensuring there is a free + /// register or a frameindex available. + virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const; + + /// processFunctionBeforeFrameFinalized - This method is called immediately + /// before the specified function's frame layout (MF.getFrameInfo()) is + /// finalized. Once the frame is finalized, MO_FrameIndex operands are + /// replaced with direct constants. This method is optional. + /// + virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = nullptr) const { + } + + virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const { + report_fatal_error("WinEH not implemented for this target"); + } + + /// This method is called during prolog/epilog code insertion to eliminate + /// call frame setup and destroy pseudo instructions (but only if the Target + /// is using them). It is responsible for eliminating these instructions, + /// replacing them with concrete instructions. This method need only be + /// implemented if using call frame setup/destroy pseudo instructions. + /// Returns an iterator pointing to the instruction after the replaced one. + virtual MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + llvm_unreachable("Call Frame Pseudo Instructions do not exist on this " + "target!"); + } + + + /// Order the symbols in the local stack frame. + /// The list of objects that we want to order is in \p objectsToAllocate as + /// indices into the MachineFrameInfo. The array can be reordered in any way + /// upon return. The contents of the array, however, may not be modified (i.e. + /// only their order may be changed). + /// By default, just maintain the original order. + virtual void + orderFrameObjects(const MachineFunction &MF, + SmallVectorImpl<int> &objectsToAllocate) const { + } + + /// Check whether or not the given \p MBB can be used as a prologue + /// for the target. + /// The prologue will be inserted first in this basic block. + /// This method is used by the shrink-wrapping pass to decide if + /// \p MBB will be correctly handled by the target. + /// As soon as the target enable shrink-wrapping without overriding + /// this method, we assume that each basic block is a valid + /// prologue. + virtual bool canUseAsPrologue(const MachineBasicBlock &MBB) const { + return true; + } + + /// Check whether or not the given \p MBB can be used as a epilogue + /// for the target. + /// The epilogue will be inserted before the first terminator of that block. + /// This method is used by the shrink-wrapping pass to decide if + /// \p MBB will be correctly handled by the target. + /// As soon as the target enable shrink-wrapping without overriding + /// this method, we assume that each basic block is a valid + /// epilogue. + virtual bool canUseAsEpilogue(const MachineBasicBlock &MBB) const { + return true; + } + + /// Check if given function is safe for not having callee saved registers. + /// This is used when interprocedural register allocation is enabled. + static bool isSafeForNoCSROpt(const Function &F) { + if (!F.hasLocalLinkage() || F.hasAddressTaken() || + !F.hasFnAttribute(Attribute::NoRecurse)) + return false; + // Function should not be optimized as tail call. + for (const User *U : F.users()) + if (auto CS = ImmutableCallSite(U)) + if (CS.isTailCall()) + return false; + return true; + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h new file mode 100644 index 0000000000000..38a1b33aecad8 --- /dev/null +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -0,0 +1,1691 @@ +//===- llvm/CodeGen/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the target machine instruction set to the code generator. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETINSTRINFO_H +#define LLVM_TARGET_TARGETINSTRINFO_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/None.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <utility> +#include <vector> + +namespace llvm { + +class DFAPacketizer; +class InstrItineraryData; +class LiveIntervals; +class LiveVariables; +class MachineMemOperand; +class MachineRegisterInfo; +class MCAsmInfo; +class MCInst; +struct MCSchedModel; +class Module; +class ScheduleDAG; +class ScheduleHazardRecognizer; +class SDNode; +class SelectionDAG; +class RegScavenger; +class TargetRegisterClass; +class TargetRegisterInfo; +class TargetSchedModel; +class TargetSubtargetInfo; + +template <class T> class SmallVectorImpl; + +//--------------------------------------------------------------------------- +/// +/// TargetInstrInfo - Interface to description of machine instruction set +/// +class TargetInstrInfo : public MCInstrInfo { +public: + TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, + unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u) + : CallFrameSetupOpcode(CFSetupOpcode), + CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode), + ReturnOpcode(ReturnOpcode) {} + TargetInstrInfo(const TargetInstrInfo &) = delete; + TargetInstrInfo &operator=(const TargetInstrInfo &) = delete; + virtual ~TargetInstrInfo(); + + static bool isGenericOpcode(unsigned Opc) { + return Opc <= TargetOpcode::GENERIC_OP_END; + } + + /// Given a machine instruction descriptor, returns the register + /// class constraint for OpNum, or NULL. + const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const; + + /// Return true if the instruction is trivially rematerializable, meaning it + /// has no side effects and requires no operands that aren't always available. + /// This means the only allowed uses are constants and unallocatable physical + /// registers so that the instructions result is independent of the place + /// in the function. + bool isTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA = nullptr) const { + return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || + (MI.getDesc().isRematerializable() && + (isReallyTriviallyReMaterializable(MI, AA) || + isReallyTriviallyReMaterializableGeneric(MI, AA))); + } + +protected: + /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is + /// set, this hook lets the target specify whether the instruction is actually + /// trivially rematerializable, taking into consideration its operands. This + /// predicate must return false if the instruction has any side effects other + /// than producing a value, or if it requres any address registers that are + /// not always available. + /// Requirements must be check as stated in isTriviallyReMaterializable() . + virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA) const { + return false; + } + + /// This method commutes the operands of the given machine instruction MI. + /// The operands to be commuted are specified by their indices OpIdx1 and + /// OpIdx2. + /// + /// If a target has any instructions that are commutable but require + /// converting to different instructions or making non-trivial changes + /// to commute them, this method can be overloaded to do that. + /// The default implementation simply swaps the commutable operands. + /// + /// If NewMI is false, MI is modified in place and returned; otherwise, a + /// new machine instruction is created and returned. + /// + /// Do not call this method for a non-commutable instruction. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + virtual MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const; + + /// Assigns the (CommutableOpIdx1, CommutableOpIdx2) pair of commutable + /// operand indices to (ResultIdx1, ResultIdx2). + /// One or both input values of the pair: (ResultIdx1, ResultIdx2) may be + /// predefined to some indices or be undefined (designated by the special + /// value 'CommuteAnyOperandIndex'). + /// The predefined result indices cannot be re-defined. + /// The function returns true iff after the result pair redefinition + /// the fixed result pair is equal to or equivalent to the source pair of + /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that + /// the pairs (x,y) and (y,x) are equivalent. + static bool fixCommutedOpIndices(unsigned &ResultIdx1, unsigned &ResultIdx2, + unsigned CommutableOpIdx1, + unsigned CommutableOpIdx2); + +private: + /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is + /// set and the target hook isReallyTriviallyReMaterializable returns false, + /// this function does target-independent tests to determine if the + /// instruction is really trivially rematerializable. + bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, + AliasAnalysis *AA) const; + +public: + /// These methods return the opcode of the frame setup/destroy instructions + /// if they exist (-1 otherwise). Some targets use pseudo instructions in + /// order to abstract away the difference between operating with a frame + /// pointer and operating without, through the use of these two instructions. + /// + unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; } + unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; } + + /// Returns true if the argument is a frame pseudo instruction. + bool isFrameInstr(const MachineInstr &I) const { + return I.getOpcode() == getCallFrameSetupOpcode() || + I.getOpcode() == getCallFrameDestroyOpcode(); + } + + /// Returns true if the argument is a frame setup pseudo instruction. + bool isFrameSetup(const MachineInstr &I) const { + return I.getOpcode() == getCallFrameSetupOpcode(); + } + + /// Returns size of the frame associated with the given frame instruction. + /// For frame setup instruction this is frame that is set up space set up + /// after the instruction. For frame destroy instruction this is the frame + /// freed by the caller. + /// Note, in some cases a call frame (or a part of it) may be prepared prior + /// to the frame setup instruction. It occurs in the calls that involve + /// inalloca arguments. This function reports only the size of the frame part + /// that is set up between the frame setup and destroy pseudo instructions. + int64_t getFrameSize(const MachineInstr &I) const { + assert(isFrameInstr(I) && "Not a frame instruction"); + assert(I.getOperand(0).getImm() >= 0); + return I.getOperand(0).getImm(); + } + + /// Returns the total frame size, which is made up of the space set up inside + /// the pair of frame start-stop instructions and the space that is set up + /// prior to the pair. + int64_t getFrameTotalSize(const MachineInstr &I) const { + if (isFrameSetup(I)) { + assert(I.getOperand(1).getImm() >= 0 && + "Frame size must not be negative"); + return getFrameSize(I) + I.getOperand(1).getImm(); + } + return getFrameSize(I); + } + + unsigned getCatchReturnOpcode() const { return CatchRetOpcode; } + unsigned getReturnOpcode() const { return ReturnOpcode; } + + /// Returns the actual stack pointer adjustment made by an instruction + /// as part of a call sequence. By default, only call frame setup/destroy + /// instructions adjust the stack, but targets may want to override this + /// to enable more fine-grained adjustment, or adjust by a different value. + virtual int getSPAdjust(const MachineInstr &MI) const; + + /// Return true if the instruction is a "coalescable" extension instruction. + /// That is, it's like a copy where it's legal for the source to overlap the + /// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's + /// expected the pre-extension value is available as a subreg of the result + /// register. This also returns the sub-register index in SubIdx. + virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const { + return false; + } + + /// If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + return 0; + } + + /// Check for post-frame ptr elimination stack locations as well. + /// This uses a heuristic so it isn't reliable for correctness. + virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const { + return 0; + } + + /// If the specified machine instruction has a load from a stack slot, + /// return true along with the FrameIndex of the loaded stack slot and the + /// machine mem operand containing the reference. + /// If not, return false. Unlike isLoadFromStackSlot, this returns true for + /// any instructions that loads from the stack. This is just a hint, as some + /// cases may be missed. + virtual bool hasLoadFromStackSlot(const MachineInstr &MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + + /// If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + return 0; + } + + /// Check for post-frame ptr elimination stack locations as well. + /// This uses a heuristic, so it isn't reliable for correctness. + virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, + int &FrameIndex) const { + return 0; + } + + /// If the specified machine instruction has a store to a stack slot, + /// return true along with the FrameIndex of the loaded stack slot and the + /// machine mem operand containing the reference. + /// If not, return false. Unlike isStoreToStackSlot, + /// this returns true for any instructions that stores to the + /// stack. This is just a hint, as some cases may be missed. + virtual bool hasStoreToStackSlot(const MachineInstr &MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + + /// Return true if the specified machine instruction + /// is a copy of one stack slot to another and has no other effect. + /// Provide the identity of the two frame indices. + virtual bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex, + int &SrcFrameIndex) const { + return false; + } + + /// Compute the size in bytes and offset within a stack slot of a spilled + /// register or subregister. + /// + /// \param [out] Size in bytes of the spilled value. + /// \param [out] Offset in bytes within the stack slot. + /// \returns true if both Size and Offset are successfully computed. + /// + /// Not all subregisters have computable spill slots. For example, + /// subregisters registers may not be byte-sized, and a pair of discontiguous + /// subregisters has no single offset. + /// + /// Targets with nontrivial bigendian implementations may need to override + /// this, particularly to support spilled vector registers. + virtual bool getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, + unsigned &Size, unsigned &Offset, + const MachineFunction &MF) const; + + /// Returns the size in bytes of the specified MachineInstr, or ~0U + /// when this function is not implemented by a target. + virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const { + return ~0U; + } + + /// Return true if the instruction is as cheap as a move instruction. + /// + /// Targets for different archs need to override this, and different + /// micro-architectures can also be finely tuned inside. + virtual bool isAsCheapAsAMove(const MachineInstr &MI) const { + return MI.isAsCheapAsAMove(); + } + + /// Return true if the instruction should be sunk by MachineSink. + /// + /// MachineSink determines on its own whether the instruction is safe to sink; + /// this gives the target a hook to override the default behavior with regards + /// to which instructions should be sunk. + virtual bool shouldSink(const MachineInstr &MI) const { return true; } + + /// Re-issue the specified 'original' instruction at the + /// specific location targeting a new destination register. + /// The register in Orig->getOperand(0).getReg() will be substituted by + /// DestReg:SubIdx. Any existing subreg index is preserved or composed with + /// SubIdx. + virtual void reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, unsigned DestReg, + unsigned SubIdx, const MachineInstr &Orig, + const TargetRegisterInfo &TRI) const; + + /// \brief Clones instruction or the whole instruction bundle \p Orig and + /// insert into \p MBB before \p InsertBefore. The target may update operands + /// that are required to be unique. + /// + /// \p Orig must not return true for MachineInstr::isNotDuplicable(). + virtual MachineInstr &duplicate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) const; + + /// This method must be implemented by targets that + /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target + /// may be able to convert a two-address instruction into one or more true + /// three-address instructions on demand. This allows the X86 target (for + /// example) to convert ADD and SHL instructions into LEA instructions if they + /// would require register copies due to two-addressness. + /// + /// This method returns a null pointer if the transformation cannot be + /// performed, otherwise it returns the last new instruction. + /// + virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineInstr &MI, + LiveVariables *LV) const { + return nullptr; + } + + // This constant can be used as an input value of operand index passed to + // the method findCommutedOpIndices() to tell the method that the + // corresponding operand index is not pre-defined and that the method + // can pick any commutable operand. + static const unsigned CommuteAnyOperandIndex = ~0U; + + /// This method commutes the operands of the given machine instruction MI. + /// + /// The operands to be commuted are specified by their indices OpIdx1 and + /// OpIdx2. OpIdx1 and OpIdx2 arguments may be set to a special value + /// 'CommuteAnyOperandIndex', which means that the method is free to choose + /// any arbitrarily chosen commutable operand. If both arguments are set to + /// 'CommuteAnyOperandIndex' then the method looks for 2 different commutable + /// operands; then commutes them if such operands could be found. + /// + /// If NewMI is false, MI is modified in place and returned; otherwise, a + /// new machine instruction is created and returned. + /// + /// Do not call this method for a non-commutable instruction or + /// for non-commuable operands. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr * + commuteInstruction(MachineInstr &MI, bool NewMI = false, + unsigned OpIdx1 = CommuteAnyOperandIndex, + unsigned OpIdx2 = CommuteAnyOperandIndex) const; + + /// Returns true iff the routine could find two commutable operands in the + /// given machine instruction. + /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. + /// If any of the INPUT values is set to the special value + /// 'CommuteAnyOperandIndex' then the method arbitrarily picks a commutable + /// operand, then returns its index in the corresponding argument. + /// If both of INPUT values are set to 'CommuteAnyOperandIndex' then method + /// looks for 2 commutable operands. + /// If INPUT values refer to some operands of MI, then the method simply + /// returns true if the corresponding operands are commutable and returns + /// false otherwise. + /// + /// For example, calling this method this way: + /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex; + /// findCommutedOpIndices(MI, Op1, Op2); + /// can be interpreted as a query asking to find an operand that would be + /// commutable with the operand#1. + virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const; + + /// A pair composed of a register and a sub-register index. + /// Used to give some type checking when modeling Reg:SubReg. + struct RegSubRegPair { + unsigned Reg; + unsigned SubReg; + + RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0) + : Reg(Reg), SubReg(SubReg) {} + }; + + /// A pair composed of a pair of a register and a sub-register index, + /// and another sub-register index. + /// Used to give some type checking when modeling Reg:SubReg1, SubReg2. + struct RegSubRegPairAndIdx : RegSubRegPair { + unsigned SubIdx; + + RegSubRegPairAndIdx(unsigned Reg = 0, unsigned SubReg = 0, + unsigned SubIdx = 0) + : RegSubRegPair(Reg, SubReg), SubIdx(SubIdx) {} + }; + + /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI + /// and \p DefIdx. + /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of + /// the list is modeled as <Reg:SubReg, SubIdx>. + /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce + /// two elements: + /// - %1:sub1, sub0 + /// - %2<:0>, sub1 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isRegSequence() or MI.isRegSequenceLike(). + /// + /// \note The generic implementation does not provide any support for + /// MI.isRegSequenceLike(). In other words, one has to override + /// getRegSequenceLikeInputs for target specific instructions. + bool + getRegSequenceInputs(const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const; + + /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. + /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce: + /// - %1:sub1, sub0 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike(). + /// + /// \note The generic implementation does not provide any support for + /// MI.isExtractSubregLike(). In other words, one has to override + /// getExtractSubregLikeInputs for target specific instructions. + bool getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const; + + /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] BaseReg and \p [out] InsertedReg contain + /// the equivalent inputs of INSERT_SUBREG. + /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce: + /// - BaseReg: %0:sub0 + /// - InsertedReg: %1:sub1, sub3 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike(). + /// + /// \note The generic implementation does not provide any support for + /// MI.isInsertSubregLike(). In other words, one has to override + /// getInsertSubregLikeInputs for target specific instructions. + bool getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const; + + /// Return true if two machine instructions would produce identical values. + /// By default, this is only true when the two instructions + /// are deemed identical except for defs. If this function is called when the + /// IR is still in SSA form, the caller can pass the MachineRegisterInfo for + /// aggressive checks. + virtual bool produceSameValue(const MachineInstr &MI0, + const MachineInstr &MI1, + const MachineRegisterInfo *MRI = nullptr) const; + + /// \returns true if a branch from an instruction with opcode \p BranchOpc + /// bytes is capable of jumping to a position \p BrOffset bytes away. + virtual bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const { + llvm_unreachable("target did not implement"); + } + + /// \returns The block that branch instruction \p MI jumps to. + virtual MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const { + llvm_unreachable("target did not implement"); + } + + /// Insert an unconditional indirect branch at the end of \p MBB to \p + /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to + /// the offset of the position to insert the new branch. + /// + /// \returns The number of bytes added to the block. + virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, + int64_t BrOffset = 0, + RegScavenger *RS = nullptr) const { + llvm_unreachable("target did not implement"); + } + + /// Analyze the branching code at the end of MBB, returning + /// true if it cannot be understood (e.g. it's a switch dispatch or isn't + /// implemented for a target). Upon success, this returns false and returns + /// with the following information in various cases: + /// + /// 1. If this block ends with no branches (it just falls through to its succ) + /// just return false, leaving TBB/FBB null. + /// 2. If this block ends with only an unconditional branch, it sets TBB to be + /// the destination block. + /// 3. If this block ends with a conditional branch and it falls through to a + /// successor block, it sets TBB to be the branch destination block and a + /// list of operands that evaluate the condition. These operands can be + /// passed to other TargetInstrInfo methods to create new branches. + /// 4. If this block ends with a conditional branch followed by an + /// unconditional branch, it returns the 'true' destination in TBB, the + /// 'false' destination in FBB, and a list of operands that evaluate the + /// condition. These operands can be passed to other TargetInstrInfo + /// methods to create new branches. + /// + /// Note that removeBranch and insertBranch must be implemented to support + /// cases where this method returns success. + /// + /// If AllowModify is true, then this routine is allowed to modify the basic + /// block (e.g. delete instructions after the unconditional branch). + /// + /// The CFG information in MBB.Predecessors and MBB.Successors must be valid + /// before calling this function. + virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const { + return true; + } + + /// Represents a predicate at the MachineFunction level. The control flow a + /// MachineBranchPredicate represents is: + /// + /// Reg = LHS `Predicate` RHS == ConditionDef + /// if Reg then goto TrueDest else goto FalseDest + /// + struct MachineBranchPredicate { + enum ComparePredicate { + PRED_EQ, // True if two values are equal + PRED_NE, // True if two values are not equal + PRED_INVALID // Sentinel value + }; + + ComparePredicate Predicate = PRED_INVALID; + MachineOperand LHS = MachineOperand::CreateImm(0); + MachineOperand RHS = MachineOperand::CreateImm(0); + MachineBasicBlock *TrueDest = nullptr; + MachineBasicBlock *FalseDest = nullptr; + MachineInstr *ConditionDef = nullptr; + + /// SingleUseCondition is true if ConditionDef is dead except for the + /// branch(es) at the end of the basic block. + /// + bool SingleUseCondition = false; + + explicit MachineBranchPredicate() = default; + }; + + /// Analyze the branching code at the end of MBB and parse it into the + /// MachineBranchPredicate structure if possible. Returns false on success + /// and true on failure. + /// + /// If AllowModify is true, then this routine is allowed to modify the basic + /// block (e.g. delete instructions after the unconditional branch). + /// + virtual bool analyzeBranchPredicate(MachineBasicBlock &MBB, + MachineBranchPredicate &MBP, + bool AllowModify = false) const { + return true; + } + + /// Remove the branching code at the end of the specific MBB. + /// This is only invoked in cases where AnalyzeBranch returns success. It + /// returns the number of instructions that were removed. + /// If \p BytesRemoved is non-null, report the change in code size from the + /// removed instructions. + virtual unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!"); + } + + /// Insert branch code into the end of the specified MachineBasicBlock. The + /// operands to this method are the same as those returned by AnalyzeBranch. + /// This is only invoked in cases where AnalyzeBranch returns success. It + /// returns the number of instructions inserted. If \p BytesAdded is non-null, + /// report the change in code size from the added instructions. + /// + /// It is also invoked by tail merging to add unconditional branches in + /// cases where AnalyzeBranch doesn't apply because there was no original + /// branch to analyze. At least this much must be implemented, else tail + /// merging needs to be disabled. + /// + /// The CFG information in MBB.Predecessors and MBB.Successors must be valid + /// before calling this function. + virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef<MachineOperand> Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!"); + } + + unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, + MachineBasicBlock *DestBB, + const DebugLoc &DL, + int *BytesAdded = nullptr) const { + return insertBranch(MBB, DestBB, nullptr, ArrayRef<MachineOperand>(), DL, + BytesAdded); + } + + /// Analyze the loop code, return true if it cannot be understoo. Upon + /// success, this function returns false and returns information about the + /// induction variable and compare instruction used at the end. + virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const { + return true; + } + + /// Generate code to reduce the loop iteration by one and check if the loop is + /// finished. Return the value/register of the the new loop count. We need + /// this function when peeling off one or more iterations of a loop. This + /// function assumes the nth iteration is peeled first. + virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, + MachineInstr &Cmp, + SmallVectorImpl<MachineOperand> &Cond, + SmallVectorImpl<MachineInstr *> &PrevInsts, + unsigned Iter, unsigned MaxIter) const { + llvm_unreachable("Target didn't implement ReduceLoopCount"); + } + + /// Delete the instruction OldInst and everything after it, replacing it with + /// an unconditional branch to NewDest. This is used by the tail merging pass. + virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const; + + /// Return true if it's legal to split the given basic + /// block at the specified instruction (i.e. instruction would be the start + /// of a new basic block). + virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + return true; + } + + /// Return true if it's profitable to predicate + /// instructions with accumulated instruction latency of "NumCycles" + /// of the specified basic block, where the probability of the instructions + /// being executed is given by Probability, and Confidence is a measure + /// of our confidence that it will be properly predicted. + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const { + return false; + } + + /// Second variant of isProfitableToIfCvt. This one + /// checks for the case where two basic blocks from true and false path + /// of a if-then-else (diamond) are predicated on mutally exclusive + /// predicates, where the probability of the true path being taken is given + /// by Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, + unsigned ExtraTCycles, + MachineBasicBlock &FMBB, unsigned NumFCycles, + unsigned ExtraFCycles, + BranchProbability Probability) const { + return false; + } + + /// Return true if it's profitable for if-converter to duplicate instructions + /// of specified accumulated instruction latencies in the specified MBB to + /// enable if-conversion. + /// The probability of the instructions being executed is given by + /// Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + BranchProbability Probability) const { + return false; + } + + /// Return true if it's profitable to unpredicate + /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually + /// exclusive predicates. + /// e.g. + /// subeq r0, r1, #1 + /// addne r0, r1, #1 + /// => + /// sub r0, r1, #1 + /// addne r0, r1, #1 + /// + /// This may be profitable is conditional instructions are always executed. + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + return false; + } + + /// Return true if it is possible to insert a select + /// instruction that chooses between TrueReg and FalseReg based on the + /// condition code in Cond. + /// + /// When successful, also return the latency in cycles from TrueReg, + /// FalseReg, and Cond to the destination register. In most cases, a select + /// instruction will be 1 cycle, so CondCycles = TrueCycles = FalseCycles = 1 + /// + /// Some x86 implementations have 2-cycle cmov instructions. + /// + /// @param MBB Block where select instruction would be inserted. + /// @param Cond Condition returned by AnalyzeBranch. + /// @param TrueReg Virtual register to select when Cond is true. + /// @param FalseReg Virtual register to select when Cond is false. + /// @param CondCycles Latency from Cond+Branch to select output. + /// @param TrueCycles Latency from TrueReg to select output. + /// @param FalseCycles Latency from FalseReg to select output. + virtual bool canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef<MachineOperand> Cond, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { + return false; + } + + /// Insert a select instruction into MBB before I that will copy TrueReg to + /// DstReg when Cond is true, and FalseReg to DstReg when Cond is false. + /// + /// This function can only be called after canInsertSelect() returned true. + /// The condition in Cond comes from AnalyzeBranch, and it can be assumed + /// that the same flags or registers required by Cond are available at the + /// insertion point. + /// + /// @param MBB Block where select instruction should be inserted. + /// @param I Insertion point. + /// @param DL Source location for debugging. + /// @param DstReg Virtual register to be defined by select instruction. + /// @param Cond Condition as computed by AnalyzeBranch. + /// @param TrueReg Virtual register to copy when Cond is true. + /// @param FalseReg Virtual register to copy when Cons is false. + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef<MachineOperand> Cond, + unsigned TrueReg, unsigned FalseReg) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!"); + } + + /// Analyze the given select instruction, returning true if + /// it cannot be understood. It is assumed that MI->isSelect() is true. + /// + /// When successful, return the controlling condition and the operands that + /// determine the true and false result values. + /// + /// Result = SELECT Cond, TrueOp, FalseOp + /// + /// Some targets can optimize select instructions, for example by predicating + /// the instruction defining one of the operands. Such targets should set + /// Optimizable. + /// + /// @param MI Select instruction to analyze. + /// @param Cond Condition controlling the select. + /// @param TrueOp Operand number of the value selected when Cond is true. + /// @param FalseOp Operand number of the value selected when Cond is false. + /// @param Optimizable Returned as true if MI is optimizable. + /// @returns False on success. + virtual bool analyzeSelect(const MachineInstr &MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert(MI.getDesc().isSelect() && "MI must be a select instruction"); + return true; + } + + /// Given a select instruction that was understood by + /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by + /// merging it with one of its operands. Returns NULL on failure. + /// + /// When successful, returns the new select instruction. The client is + /// responsible for deleting MI. + /// + /// If both sides of the select can be optimized, PreferFalse is used to pick + /// a side. + /// + /// @param MI Optimizable select instruction. + /// @param NewMIs Set that record all MIs in the basic block up to \p + /// MI. Has to be updated with any newly created MI or deleted ones. + /// @param PreferFalse Try to optimize FalseOp instead of TrueOp. + /// @returns Optimized instruction or NULL. + virtual MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl<MachineInstr *> &NewMIs, + bool PreferFalse = false) const { + // This function must be implemented if Optimizable is ever set. + llvm_unreachable("Target must implement TargetInstrInfo::optimizeSelect!"); + } + + /// Emit instructions to copy a pair of physical registers. + /// + /// This function should support copies within any legal register class as + /// well as any cross-class copies created during instruction selection. + /// + /// The source and destination registers may overlap, which may require a + /// careful implementation when multiple copy instructions are required for + /// large registers. See for example the ARM target. + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, const DebugLoc &DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!"); + } + + /// Store the specified register of the given register class to the specified + /// stack frame index. The store instruction is to be added to the given + /// machine basic block before the specified machine instruction. If isKill + /// is true, the register operand is the last use and must be marked kill. + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::storeRegToStackSlot!"); + } + + /// Load the specified register of the given register class from the specified + /// stack frame index. The load instruction is to be added to the given + /// machine basic block before the specified machine instruction. + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::loadRegFromStackSlot!"); + } + + /// This function is called for all pseudo instructions + /// that remain after register allocation. Many pseudo instructions are + /// created to help register allocation. This is the place to convert them + /// into real instructions. The target can edit MI in place, or it can insert + /// new instructions and erase MI. The function should return true if + /// anything was changed. + virtual bool expandPostRAPseudo(MachineInstr &MI) const { return false; } + + /// Check whether the target can fold a load that feeds a subreg operand + /// (or a subreg operand that feeds a store). + /// For example, X86 may want to return true if it can fold + /// movl (%esp), %eax + /// subb, %al, ... + /// Into: + /// subb (%esp), ... + /// + /// Ideally, we'd like the target implementation of foldMemoryOperand() to + /// reject subregs - but since this behavior used to be enforced in the + /// target-independent code, moving this responsibility to the targets + /// has the potential of causing nasty silent breakage in out-of-tree targets. + virtual bool isSubregFoldable() const { return false; } + + /// Attempt to fold a load or store of the specified stack + /// slot into the specified machine instruction for the specified operand(s). + /// If this is possible, a new instruction is returned with the specified + /// operand folded, otherwise NULL is returned. + /// The new instruction is inserted before MI, and the client is responsible + /// for removing the old instruction. + MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, + int FrameIndex, + LiveIntervals *LIS = nullptr) const; + + /// Same as the previous version except it allows folding of any load and + /// store from / to any address, not just from a specific stack slot. + MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineInstr &LoadMI, + LiveIntervals *LIS = nullptr) const; + + /// Return true when there is potentially a faster code sequence + /// for an instruction chain ending in \p Root. All potential patterns are + /// returned in the \p Pattern vector. Pattern should be sorted in priority + /// order since the pattern evaluator stops checking as soon as it finds a + /// faster sequence. + /// \param Root - Instruction that could be combined with one of its operands + /// \param Patterns - Vector of possible combination patterns + virtual bool getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) const; + + /// Return true when a code sequence can improve throughput. It + /// should be called only for instructions in loops. + /// \param Pattern - combiner pattern + virtual bool isThroughputPattern(MachineCombinerPattern Pattern) const; + + /// Return true if the input \P Inst is part of a chain of dependent ops + /// that are suitable for reassociation, otherwise return false. + /// If the instruction's operands must be commuted to have a previous + /// instruction of the same type define the first source operand, \P Commuted + /// will be set to true. + bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const; + + /// Return true when \P Inst is both associative and commutative. + virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const { + return false; + } + + /// Return true when \P Inst has reassociable operands in the same \P MBB. + virtual bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const; + + /// Return true when \P Inst has reassociable sibling. + bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const; + + /// When getMachineCombinerPatterns() finds patterns, this function generates + /// the instructions that could replace the original code sequence. The client + /// has to decide whether the actual replacement is beneficial or not. + /// \param Root - Instruction that could be combined with one of its operands + /// \param Pattern - Combination pattern for Root + /// \param InsInstrs - Vector of new instructions that implement P + /// \param DelInstrs - Old instructions, including Root, that could be + /// replaced by InsInstr + /// \param InstrIdxForVirtReg - map of virtual register to instruction in + /// InsInstr that defines it + virtual void genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; + + /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to + /// reduce critical path length. + void reassociateOps(MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; + + /// This is an architecture-specific helper function of reassociateOps. + /// Set special operand attributes for new instructions after reassociation. + virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const {} + + /// Return true when a target supports MachineCombiner. + virtual bool useMachineCombiner() const { return false; } + +protected: + /// Target-dependent implementation for foldMemoryOperand. + /// Target-independent code in foldMemoryOperand will + /// take care of adding a MachineMemOperand to the newly created instruction. + /// The instruction and any auxiliary instructions necessary will be inserted + /// at InsertPt. + virtual MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, + LiveIntervals *LIS = nullptr) const { + return nullptr; + } + + /// Target-dependent implementation for foldMemoryOperand. + /// Target-independent code in foldMemoryOperand will + /// take care of adding a MachineMemOperand to the newly created instruction. + /// The instruction and any auxiliary instructions necessary will be inserted + /// at InsertPt. + virtual MachineInstr *foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS = nullptr) const { + return nullptr; + } + + /// \brief Target-dependent implementation of getRegSequenceInputs. + /// + /// \returns true if it is possible to build the equivalent + /// REG_SEQUENCE inputs with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isRegSequenceLike(). + /// + /// \see TargetInstrInfo::getRegSequenceInputs. + virtual bool getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { + return false; + } + + /// \brief Target-dependent implementation of getExtractSubregInputs. + /// + /// \returns true if it is possible to build the equivalent + /// EXTRACT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isExtractSubregLike(). + /// + /// \see TargetInstrInfo::getExtractSubregInputs. + virtual bool getExtractSubregLikeInputs(const MachineInstr &MI, + unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + return false; + } + + /// \brief Target-dependent implementation of getInsertSubregInputs. + /// + /// \returns true if it is possible to build the equivalent + /// INSERT_SUBREG inputs with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isInsertSubregLike(). + /// + /// \see TargetInstrInfo::getInsertSubregInputs. + virtual bool + getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const { + return false; + } + +public: + /// getAddressSpaceForPseudoSourceKind - Given the kind of memory + /// (e.g. stack) the target returns the corresponding address space. + virtual unsigned + getAddressSpaceForPseudoSourceKind(PseudoSourceValue::PSVKind Kind) const { + return 0; + } + + /// unfoldMemoryOperand - Separate a single instruction which folded a load or + /// a store or a load and a store into two or more instruction. If this is + /// possible, returns true as well as the new instructions by reference. + virtual bool + unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, + bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl<MachineInstr *> &NewMIs) const { + return false; + } + + virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl<SDNode *> &NewNodes) const { + return false; + } + + /// Returns the opcode of the would be new + /// instruction after load / store are unfolded from an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. If LoadRegIndex is non-null, it is filled in with the operand + /// index of the operand which will hold the register holding the loaded + /// value. + virtual unsigned + getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = nullptr) const { + return 0; + } + + /// This is used by the pre-regalloc scheduler to determine if two loads are + /// loading from the same base address. It should only return true if the base + /// pointers are the same and the only differences between the two addresses + /// are the offset. It also returns the offsets by reference. + virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const { + return false; + } + + /// This is a used by the pre-regalloc scheduler to determine (in conjunction + /// with areLoadsFromSameBasePtr) if two loads should be scheduled together. + /// On some targets if two loads are loading from + /// addresses in the same cache line, it's better if they are scheduled + /// together. This function takes two integers that represent the load offsets + /// from the common base address. It returns true if it decides it's desirable + /// to schedule the two loads together. "NumLoads" is the number of loads that + /// have already been scheduled after Load1. + virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + return false; + } + + /// Get the base register and byte offset of an instruction that reads/writes + /// memory. + virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, + int64_t &Offset, + const TargetRegisterInfo *TRI) const { + return false; + } + + /// Return true if the instruction contains a base register and offset. If + /// true, the function also sets the operand position in the instruction + /// for the base register and offset. + virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, + unsigned &BasePos, + unsigned &OffsetPos) const { + return false; + } + + /// If the instruction is an increment of a constant value, return the amount. + virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const { + return false; + } + + /// Returns true if the two given memory operations should be scheduled + /// adjacent. Note that you have to add: + /// DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + /// or + /// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + /// to TargetPassConfig::createMachineScheduler() to have an effect. + virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1, + MachineInstr &SecondLdSt, unsigned BaseReg2, + unsigned NumLoads) const { + llvm_unreachable("target did not implement shouldClusterMemOps()"); + } + + /// Reverses the branch condition of the specified condition list, + /// returning false on success and true if it cannot be reversed. + virtual bool + reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + return true; + } + + /// Insert a noop into the instruction stream at the specified point. + virtual void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// Return the noop instruction to use for a noop. + virtual void getNoop(MCInst &NopInst) const; + + /// Return true for post-incremented instructions. + virtual bool isPostIncrement(const MachineInstr &MI) const { return false; } + + /// Returns true if the instruction is already predicated. + virtual bool isPredicated(const MachineInstr &MI) const { return false; } + + /// Returns true if the instruction is a + /// terminator instruction that has not been predicated. + virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const; + + /// Returns true if MI is an unconditional tail call. + virtual bool isUnconditionalTailCall(const MachineInstr &MI) const { + return false; + } + + /// Returns true if the tail call can be made conditional on BranchCond. + virtual bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond, + const MachineInstr &TailCall) const { + return false; + } + + /// Replace the conditional branch in MBB with a conditional tail call. + virtual void replaceBranchWithTailCall(MachineBasicBlock &MBB, + SmallVectorImpl<MachineOperand> &Cond, + const MachineInstr &TailCall) const { + llvm_unreachable("Target didn't implement replaceBranchWithTailCall!"); + } + + /// Convert the instruction into a predicated instruction. + /// It returns true if the operation was successful. + virtual bool PredicateInstruction(MachineInstr &MI, + ArrayRef<MachineOperand> Pred) const; + + /// Returns true if the first specified predicate + /// subsumes the second, e.g. GE subsumes GT. + virtual bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1, + ArrayRef<MachineOperand> Pred2) const { + return false; + } + + /// If the specified instruction defines any predicate + /// or condition code register(s) used for predication, returns true as well + /// as the definition predicate(s) by reference. + virtual bool DefinesPredicate(MachineInstr &MI, + std::vector<MachineOperand> &Pred) const { + return false; + } + + /// Return true if the specified instruction can be predicated. + /// By default, this returns true for every instruction with a + /// PredicateOperand. + virtual bool isPredicable(const MachineInstr &MI) const { + return MI.getDesc().isPredicable(); + } + + /// Return true if it's safe to move a machine + /// instruction that defines the specified register class. + virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + return true; + } + + /// Test if the given instruction should be considered a scheduling boundary. + /// This primarily includes labels and terminators. + virtual bool isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; + + /// Measure the specified inline asm to determine an approximation of its + /// length. + virtual unsigned getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const; + + /// Allocate and return a hazard recognizer to use for this target when + /// scheduling the machine instructions before register allocation. + virtual ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const; + + /// Allocate and return a hazard recognizer to use for this target when + /// scheduling the machine instructions before register allocation. + virtual ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData *, + const ScheduleDAG *DAG) const; + + /// Allocate and return a hazard recognizer to use for this target when + /// scheduling the machine instructions after register allocation. + virtual ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *, + const ScheduleDAG *DAG) const; + + /// Allocate and return a hazard recognizer to use for by non-scheduling + /// passes. + virtual ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { + return nullptr; + } + + /// Provide a global flag for disabling the PreRA hazard recognizer that + /// targets may choose to honor. + bool usePreRAHazardRecognizer() const; + + /// For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, + unsigned &SrcReg2, int &Mask, int &Value) const { + return false; + } + + /// See if the comparison instruction can be converted + /// into something more efficient. E.g., on ARM most instructions can set the + /// flags register, obviating the need for a separate CMP. + virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int Mask, int Value, + const MachineRegisterInfo *MRI) const { + return false; + } + virtual bool optimizeCondBranch(MachineInstr &MI) const { return false; } + + /// Try to remove the load by folding it to a register operand at the use. + /// We fold the load instructions if and only if the + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. + virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + return nullptr; + } + + /// 'Reg' is known to be defined by a move immediate instruction, + /// try to fold the immediate into the use instruction. + /// If MRI->hasOneNonDBGUse(Reg) is true, and this function returns true, + /// then the caller may assume that DefMI has been erased from its parent + /// block. The caller may assume that it will not be erased by this + /// function otherwise. + virtual bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const { + return false; + } + + /// Return the number of u-operations the given machine + /// instruction will be decoded to on the target cpu. The itinerary's + /// IssueWidth is the number of microops that can be dispatched each + /// cycle. An instruction with zero microops takes no dispatch resources. + virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr &MI) const; + + /// Return true for pseudo instructions that don't consume any + /// machine resources in their current form. These are common cases that the + /// scheduler should consider free, rather than conservatively handling them + /// as instructions with no itinerary. + bool isZeroCost(unsigned Opcode) const { + return Opcode <= TargetOpcode::COPY; + } + + virtual int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const; + + /// Compute and return the use operand latency of a given pair of def and use. + /// In most cases, the static scheduling itinerary was enough to determine the + /// operand latency. But it may not be possible for instructions with variable + /// number of defs / uses. + /// + /// This is a raw interface to the itinerary that may be directly overridden + /// by a target. Use computeOperandLatency to get the best estimate of + /// latency. + virtual int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const; + + /// Compute the instruction latency of a given instruction. + /// If the instruction has higher cost when predicated, it's returned via + /// PredCost. + virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr &MI, + unsigned *PredCost = nullptr) const; + + virtual unsigned getPredicationCost(const MachineInstr &MI) const; + + virtual int getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const; + + /// Return the default expected latency for a def based on its opcode. + unsigned defaultDefLatency(const MCSchedModel &SchedModel, + const MachineInstr &DefMI) const; + + int computeDefOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI) const; + + /// Return true if this opcode has high latency to its result. + virtual bool isHighLatencyDef(int opc) const { return false; } + + /// Compute operand latency between a def of 'Reg' + /// and a use in the current loop. Return true if the target considered + /// it 'high'. This is used by optimization passes such as machine LICM to + /// determine whether it makes sense to hoist an instruction out even in a + /// high register pressure situation. + virtual bool hasHighOperandLatency(const TargetSchedModel &SchedModel, + const MachineRegisterInfo *MRI, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const { + return false; + } + + /// Compute operand latency of a def of 'Reg'. Return true + /// if the target considered it 'low'. + virtual bool hasLowDefLatency(const TargetSchedModel &SchedModel, + const MachineInstr &DefMI, + unsigned DefIdx) const; + + /// Perform target-specific instruction verification. + virtual bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + return true; + } + + /// Return the current execution domain and bit mask of + /// possible domains for instruction. + /// + /// Some micro-architectures have multiple execution domains, and multiple + /// opcodes that perform the same operation in different domains. For + /// example, the x86 architecture provides the por, orps, and orpd + /// instructions that all do the same thing. There is a latency penalty if a + /// register is written in one domain and read in another. + /// + /// This function returns a pair (domain, mask) containing the execution + /// domain of MI, and a bit mask of possible domains. The setExecutionDomain + /// function can be used to change the opcode to one of the domains in the + /// bit mask. Instructions whose execution domain can't be changed should + /// return a 0 mask. + /// + /// The execution domain numbers don't have any special meaning except domain + /// 0 is used for instructions that are not associated with any interesting + /// execution domain. + /// + virtual std::pair<uint16_t, uint16_t> + getExecutionDomain(const MachineInstr &MI) const { + return std::make_pair(0, 0); + } + + /// Change the opcode of MI to execute in Domain. + /// + /// The bit (1 << Domain) must be set in the mask returned from + /// getExecutionDomain(MI). + virtual void setExecutionDomain(MachineInstr &MI, unsigned Domain) const {} + + /// Returns the preferred minimum clearance + /// before an instruction with an unwanted partial register update. + /// + /// Some instructions only write part of a register, and implicitly need to + /// read the other parts of the register. This may cause unwanted stalls + /// preventing otherwise unrelated instructions from executing in parallel in + /// an out-of-order CPU. + /// + /// For example, the x86 instruction cvtsi2ss writes its result to bits + /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so + /// the instruction needs to wait for the old value of the register to become + /// available: + /// + /// addps %xmm1, %xmm0 + /// movaps %xmm0, (%rax) + /// cvtsi2ss %rbx, %xmm0 + /// + /// In the code above, the cvtsi2ss instruction needs to wait for the addps + /// instruction before it can issue, even though the high bits of %xmm0 + /// probably aren't needed. + /// + /// This hook returns the preferred clearance before MI, measured in + /// instructions. Other defs of MI's operand OpNum are avoided in the last N + /// instructions before MI. It should only return a positive value for + /// unwanted dependencies. If the old bits of the defined register have + /// useful values, or if MI is determined to otherwise read the dependency, + /// the hook should return 0. + /// + /// The unwanted dependency may be handled by: + /// + /// 1. Allocating the same register for an MI def and use. That makes the + /// unwanted dependency identical to a required dependency. + /// + /// 2. Allocating a register for the def that has no defs in the previous N + /// instructions. + /// + /// 3. Calling breakPartialRegDependency() with the same arguments. This + /// allows the target to insert a dependency breaking instruction. + /// + virtual unsigned + getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // The default implementation returns 0 for no partial register dependency. + return 0; + } + + /// \brief Return the minimum clearance before an instruction that reads an + /// unused register. + /// + /// For example, AVX instructions may copy part of a register operand into + /// the unused high bits of the destination register. + /// + /// vcvtsi2sdq %rax, undef %xmm0, %xmm14 + /// + /// In the code above, vcvtsi2sdq copies %xmm0[127:64] into %xmm14 creating a + /// false dependence on any previous write to %xmm0. + /// + /// This hook works similarly to getPartialRegUpdateClearance, except that it + /// does not take an operand index. Instead sets \p OpNum to the index of the + /// unused register. + virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, + const TargetRegisterInfo *TRI) const { + // The default implementation returns 0 for no undef register dependency. + return 0; + } + + /// Insert a dependency-breaking instruction + /// before MI to eliminate an unwanted dependency on OpNum. + /// + /// If it wasn't possible to avoid a def in the last N instructions before MI + /// (see getPartialRegUpdateClearance), this hook will be called to break the + /// unwanted dependency. + /// + /// On x86, an xorps instruction can be used as a dependency breaker: + /// + /// addps %xmm1, %xmm0 + /// movaps %xmm0, (%rax) + /// xorps %xmm0, %xmm0 + /// cvtsi2ss %rbx, %xmm0 + /// + /// An <imp-kill> operand should be added to MI if an instruction was + /// inserted. This ties the instructions together in the post-ra scheduler. + /// + virtual void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const {} + + /// Create machine specific model for scheduling. + virtual DFAPacketizer * + CreateTargetScheduleState(const TargetSubtargetInfo &) const { + return nullptr; + } + + /// Sometimes, it is possible for the target + /// to tell, even without aliasing information, that two MIs access different + /// memory addresses. This function returns true if two MIs access different + /// memory addresses and false otherwise. + /// + /// Assumes any physical registers used to compute addresses have the same + /// value for both instructions. (This is the most useful assumption for + /// post-RA scheduling.) + /// + /// See also MachineInstr::mayAlias, which is implemented on top of this + /// function. + virtual bool + areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, + AliasAnalysis *AA = nullptr) const { + assert((MIa.mayLoad() || MIa.mayStore()) && + "MIa must load from or modify a memory location"); + assert((MIb.mayLoad() || MIb.mayStore()) && + "MIb must load from or modify a memory location"); + return false; + } + + /// \brief Return the value to use for the MachineCSE's LookAheadLimit, + /// which is a heuristic used for CSE'ing phys reg defs. + virtual unsigned getMachineCSELookAheadLimit() const { + // The default lookahead is small to prevent unprofitable quadratic + // behavior. + return 5; + } + + /// Return an array that contains the ids of the target indices (used for the + /// TargetIndex machine operand) and their names. + /// + /// MIR Serialization is able to serialize only the target indices that are + /// defined by this method. + virtual ArrayRef<std::pair<int, const char *>> + getSerializableTargetIndices() const { + return None; + } + + /// Decompose the machine operand's target flags into two values - the direct + /// target flag value and any of bit flags that are applied. + virtual std::pair<unsigned, unsigned> + decomposeMachineOperandsTargetFlags(unsigned /*TF*/) const { + return std::make_pair(0u, 0u); + } + + /// Return an array that contains the direct target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + virtual ArrayRef<std::pair<unsigned, const char *>> + getSerializableDirectMachineOperandTargetFlags() const { + return None; + } + + /// Return an array that contains the bitmask target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + virtual ArrayRef<std::pair<unsigned, const char *>> + getSerializableBitmaskMachineOperandTargetFlags() const { + return None; + } + + /// Return an array that contains the MMO target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the MMO target flags that are + /// defined by this method. + virtual ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> + getSerializableMachineMemOperandTargetFlags() const { + return None; + } + + /// Determines whether \p Inst is a tail call instruction. Override this + /// method on targets that do not properly set MCID::Return and MCID::Call on + /// tail call instructions." + virtual bool isTailCall(const MachineInstr &Inst) const { + return Inst.isReturn() && Inst.isCall(); + } + + /// True if the instruction is bound to the top of its basic block and no + /// other instructions shall be inserted before it. This can be implemented + /// to prevent register allocator to insert spills before such instructions. + virtual bool isBasicBlockPrologue(const MachineInstr &MI) const { + return false; + } + + /// \brief Describes the number of instructions that it will take to call and + /// construct a frame for a given outlining candidate. + struct MachineOutlinerInfo { + /// Number of instructions to call an outlined function for this candidate. + unsigned CallOverhead; + + /// \brief Number of instructions to construct an outlined function frame + /// for this candidate. + unsigned FrameOverhead; + + /// \brief Represents the specific instructions that must be emitted to + /// construct a call to this candidate. + unsigned CallConstructionID; + + /// \brief Represents the specific instructions that must be emitted to + /// construct a frame for this candidate's outlined function. + unsigned FrameConstructionID; + + MachineOutlinerInfo() {} + MachineOutlinerInfo(unsigned CallOverhead, unsigned FrameOverhead, + unsigned CallConstructionID, + unsigned FrameConstructionID) + : CallOverhead(CallOverhead), FrameOverhead(FrameOverhead), + CallConstructionID(CallConstructionID), + FrameConstructionID(FrameConstructionID) {} + }; + + /// \brief Returns a \p MachineOutlinerInfo struct containing target-specific + /// information for a set of outlining candidates. + virtual MachineOutlinerInfo getOutlininingCandidateInfo( + std::vector< + std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> + &RepeatedSequenceLocs) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::getOutliningOverhead!"); + } + + /// Represents how an instruction should be mapped by the outliner. + /// \p Legal instructions are those which are safe to outline. + /// \p Illegal instructions are those which cannot be outlined. + /// \p Invisible instructions are instructions which can be outlined, but + /// shouldn't actually impact the outlining result. + enum MachineOutlinerInstrType { Legal, Illegal, Invisible }; + + /// Returns how or if \p MI should be outlined. + virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::getOutliningType!"); + } + + /// Insert a custom epilogue for outlined functions. + /// This may be empty, in which case no epilogue or return statement will be + /// emitted. + virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + const MachineOutlinerInfo &MInfo) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!"); + } + + /// Insert a call to an outlined function into the program. + /// Returns an iterator to the spot where we inserted the call. This must be + /// implemented by the target. + virtual MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const MachineOutlinerInfo &MInfo) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); + } + + /// Insert a custom prologue for outlined functions. + /// This may be empty, in which case no prologue will be emitted. + virtual void insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + const MachineOutlinerInfo &MInfo) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::insertOutlinerPrologue!"); + } + + /// Return true if the function can safely be outlined from. + /// A function \p MF is considered safe for outlining if an outlined function + /// produced from instructions in F will produce a program which produces the + /// same output for any set of given inputs. + virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const { + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::isFunctionSafeToOutlineFrom!"); + } + +private: + unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; + unsigned CatchRetOpcode; + unsigned ReturnOpcode; +}; + +/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. +template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> { + using RegInfo = DenseMapInfo<unsigned>; + + static inline TargetInstrInfo::RegSubRegPair getEmptyKey() { + return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(), + RegInfo::getEmptyKey()); + } + + static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() { + return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(), + RegInfo::getTombstoneKey()); + } + + /// \brief Reuse getHashValue implementation from + /// std::pair<unsigned, unsigned>. + static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) { + std::pair<unsigned, unsigned> PairVal = std::make_pair(Val.Reg, Val.SubReg); + return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal); + } + + static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS, + const TargetInstrInfo::RegSubRegPair &RHS) { + return RegInfo::isEqual(LHS.Reg, RHS.Reg) && + RegInfo::isEqual(LHS.SubReg, RHS.SubReg); + } +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_TARGETINSTRINFO_H diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h new file mode 100644 index 0000000000000..0fa19d09e776a --- /dev/null +++ b/include/llvm/CodeGen/TargetLowering.h @@ -0,0 +1,3539 @@ +//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file describes how to lower LLVM code to machine code. This has two +/// main components: +/// +/// 1. Which ValueTypes are natively supported by the target. +/// 2. Which operations are supported for supported ValueTypes. +/// 3. Cost thresholds for alternative implementations of certain operations. +/// +/// In addition it has a few other components, like information about FP +/// immediates. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETLOWERING_H +#define LLVM_CODEGEN_TARGETLOWERING_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetCallingConv.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <climits> +#include <cstdint> +#include <iterator> +#include <map> +#include <string> +#include <utility> +#include <vector> + +namespace llvm { + +class BranchProbability; +class CCState; +class CCValAssign; +class Constant; +class FastISel; +class FunctionLoweringInfo; +class GlobalValue; +class IntrinsicInst; +struct KnownBits; +class LLVMContext; +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineJumpTableInfo; +class MachineLoop; +class MachineRegisterInfo; +class MCContext; +class MCExpr; +class Module; +class TargetRegisterClass; +class TargetLibraryInfo; +class TargetRegisterInfo; +class Value; + +namespace Sched { + + enum Preference { + None, // No preference + Source, // Follow source order. + RegPressure, // Scheduling for lowest register pressure. + Hybrid, // Scheduling for both latency and register pressure. + ILP, // Scheduling for ILP in low register pressure mode. + VLIW // Scheduling for VLIW targets. + }; + +} // end namespace Sched + +/// This base class for TargetLowering contains the SelectionDAG-independent +/// parts that can be used from the rest of CodeGen. +class TargetLoweringBase { +public: + /// This enum indicates whether operations are valid for a target, and if not, + /// what action should be used to make them valid. + enum LegalizeAction : uint8_t { + Legal, // The target natively supports this operation. + Promote, // This operation should be executed in a larger type. + Expand, // Try to expand this to other ops, otherwise use a libcall. + LibCall, // Don't try to expand this to other ops, always use a libcall. + Custom // Use the LowerOperation hook to implement custom lowering. + }; + + /// This enum indicates whether a types are legal for a target, and if not, + /// what action should be used to make them valid. + enum LegalizeTypeAction : uint8_t { + TypeLegal, // The target natively supports this type. + TypePromoteInteger, // Replace this integer with a larger one. + TypeExpandInteger, // Split this integer into two of half the size. + TypeSoftenFloat, // Convert this float to a same size integer type, + // if an operation is not supported in target HW. + TypeExpandFloat, // Split this float into two of half the size. + TypeScalarizeVector, // Replace this one-element vector with its element. + TypeSplitVector, // Split this vector into two of half the size. + TypeWidenVector, // This vector should be widened into a larger vector. + TypePromoteFloat // Replace this float with a larger one. + }; + + /// LegalizeKind holds the legalization kind that needs to happen to EVT + /// in order to type-legalize it. + using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; + + /// Enum that describes how the target represents true/false values. + enum BooleanContent { + UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. + ZeroOrOneBooleanContent, // All bits zero except for bit 0. + ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. + }; + + /// Enum that describes what type of support for selects the target has. + enum SelectSupportKind { + ScalarValSelect, // The target supports scalar selects (ex: cmov). + ScalarCondVectorVal, // The target supports selects with a scalar condition + // and vector values (ex: cmov). + VectorMaskSelect // The target supports vector selects with a vector + // mask (ex: x86 blends). + }; + + /// Enum that specifies what an atomic load/AtomicRMWInst is expanded + /// to, if at all. Exists because different targets have different levels of + /// support for these atomic instructions, and also have different options + /// w.r.t. what they should expand to. + enum class AtomicExpansionKind { + None, // Don't expand the instruction. + LLSC, // Expand the instruction into loadlinked/storeconditional; used + // by ARM/AArch64. + LLOnly, // Expand the (load) instruction into just a load-linked, which has + // greater atomic guarantees than a normal load. + CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. + }; + + /// Enum that specifies when a multiplication should be expanded. + enum class MulExpansionKind { + Always, // Always expand the instruction. + OnlyLegalOrCustom, // Only expand when the resulting instructions are legal + // or custom. + }; + + class ArgListEntry { + public: + Value *Val = nullptr; + SDValue Node = SDValue(); + Type *Ty = nullptr; + bool IsSExt : 1; + bool IsZExt : 1; + bool IsInReg : 1; + bool IsSRet : 1; + bool IsNest : 1; + bool IsByVal : 1; + bool IsInAlloca : 1; + bool IsReturned : 1; + bool IsSwiftSelf : 1; + bool IsSwiftError : 1; + uint16_t Alignment = 0; + + ArgListEntry() + : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), + IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), + IsSwiftSelf(false), IsSwiftError(false) {} + + void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx); + }; + using ArgListTy = std::vector<ArgListEntry>; + + virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, + ArgListTy &Args) const {}; + + static ISD::NodeType getExtendForContent(BooleanContent Content) { + switch (Content) { + case UndefinedBooleanContent: + // Extend by adding rubbish bits. + return ISD::ANY_EXTEND; + case ZeroOrOneBooleanContent: + // Extend by adding zero bits. + return ISD::ZERO_EXTEND; + case ZeroOrNegativeOneBooleanContent: + // Extend by copying the sign bit. + return ISD::SIGN_EXTEND; + } + llvm_unreachable("Invalid content kind"); + } + + /// NOTE: The TargetMachine owns TLOF. + explicit TargetLoweringBase(const TargetMachine &TM); + TargetLoweringBase(const TargetLoweringBase &) = delete; + TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; + virtual ~TargetLoweringBase() = default; + +protected: + /// \brief Initialize all of the actions to default values. + void initActions(); + +public: + const TargetMachine &getTargetMachine() const { return TM; } + + virtual bool useSoftFloat() const { return false; } + + /// Return the pointer type for the given address space, defaults to + /// the pointer type from the data layout. + /// FIXME: The default needs to be removed once all the code is updated. + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { + return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); + } + + /// Return the type for frame index, which is determined by + /// the alloca address space specified through the data layout. + MVT getFrameIndexTy(const DataLayout &DL) const { + return getPointerTy(DL, DL.getAllocaAddrSpace()); + } + + /// Return the type for operands of fence. + /// TODO: Let fence operands be of i32 type and remove this. + virtual MVT getFenceOperandTy(const DataLayout &DL) const { + return getPointerTy(DL); + } + + /// EVT is not used in-tree, but is used by out-of-tree target. + /// A documentation for this function would be nice... + virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; + + EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const; + + /// Returns the type to be used for the index operand of: + /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, + /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR + virtual MVT getVectorIdxTy(const DataLayout &DL) const { + return getPointerTy(DL); + } + + virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { + return true; + } + + /// Return true if multiple condition registers are available. + bool hasMultipleConditionRegisters() const { + return HasMultipleConditionRegisters; + } + + /// Return true if the target has BitExtract instructions. + bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } + + /// Return the preferred vector type legalization action. + virtual TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const { + // The default action for one element vectors is to scalarize + if (VT.getVectorNumElements() == 1) + return TypeScalarizeVector; + // The default action for other vectors is to promote + return TypePromoteInteger; + } + + // There are two general methods for expanding a BUILD_VECTOR node: + // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle + // them together. + // 2. Build the vector on the stack and then load it. + // If this function returns true, then method (1) will be used, subject to + // the constraint that all of the necessary shuffles are legal (as determined + // by isShuffleMaskLegal). If this function returns false, then method (2) is + // always used. The vector type, and the number of defined values, are + // provided. + virtual bool + shouldExpandBuildVectorWithShuffles(EVT /* VT */, + unsigned DefinedValues) const { + return DefinedValues < 3; + } + + /// Return true if integer divide is usually cheaper than a sequence of + /// several shifts, adds, and multiplies for this target. + /// The definition of "cheaper" may depend on whether we're optimizing + /// for speed or for size. + virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } + + /// Return true if the target can handle a standalone remainder operation. + virtual bool hasStandaloneRem(EVT VT) const { + return true; + } + + /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). + virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { + // Default behavior is to replace SQRT(X) with X*RSQRT(X). + return false; + } + + /// Reciprocal estimate status values used by the functions below. + enum ReciprocalEstimate : int { + Unspecified = -1, + Disabled = 0, + Enabled = 1 + }; + + /// Return a ReciprocalEstimate enum value for a square root of the given type + /// based on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; + + /// Return a ReciprocalEstimate enum value for a division of the given type + /// based on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; + + /// Return the refinement step count for a square root of the given type based + /// on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; + + /// Return the refinement step count for a division of the given type based + /// on the function's attributes. If the operation is not overridden by + /// the function's attributes, "Unspecified" is returned and target defaults + /// are expected to be used for instruction selection. + int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; + + /// Returns true if target has indicated at least one type should be bypassed. + bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } + + /// Returns map of slow types for division or remainder with corresponding + /// fast types + const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { + return BypassSlowDivWidths; + } + + /// Return true if Flow Control is an expensive operation that should be + /// avoided. + bool isJumpExpensive() const { return JumpIsExpensive; } + + /// Return true if selects are only cheaper than branches if the branch is + /// unlikely to be predicted right. + bool isPredictableSelectExpensive() const { + return PredictableSelectIsExpensive; + } + + /// If a branch or a select condition is skewed in one direction by more than + /// this factor, it is very likely to be predicted correctly. + virtual BranchProbability getPredictableBranchThreshold() const; + + /// Return true if the following transform is beneficial: + /// fold (conv (load x)) -> (load (conv*)x) + /// On architectures that don't natively support some vector loads + /// efficiently, casting the load to a smaller vector of larger types and + /// loading is more efficient, however, this can be undone by optimizations in + /// dag combiner. + virtual bool isLoadBitCastBeneficial(EVT LoadVT, + EVT BitcastVT) const { + // Don't do if we could do an indexed load on the original type, but not on + // the new one. + if (!LoadVT.isSimple() || !BitcastVT.isSimple()) + return true; + + MVT LoadMVT = LoadVT.getSimpleVT(); + + // Don't bother doing this if it's just going to be promoted again later, as + // doing so might interfere with other combines. + if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && + getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) + return false; + + return true; + } + + /// Return true if the following transform is beneficial: + /// (store (y (conv x)), y*)) -> (store x, (x*)) + virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const { + // Default to the same logic as loads. + return isLoadBitCastBeneficial(StoreVT, BitcastVT); + } + + /// Return true if it is expected to be cheaper to do a store of a non-zero + /// vector constant with the given size and type for the address space than to + /// store the individual scalar element constants. + virtual bool storeOfVectorConstantIsCheap(EVT MemVT, + unsigned NumElem, + unsigned AddrSpace) const { + return false; + } + + /// Allow store merging after legalization in addition to before legalization. + /// This may catch stores that do not exist earlier (eg, stores created from + /// intrinsics). + virtual bool mergeStoresAfterLegalization() const { return true; } + + /// Returns if it's reasonable to merge stores to MemVT size. + virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { + return true; + } + + /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. + virtual bool isCheapToSpeculateCttz() const { + return false; + } + + /// \brief Return true if it is cheap to speculate a call to intrinsic ctlz. + virtual bool isCheapToSpeculateCtlz() const { + return false; + } + + /// \brief Return true if ctlz instruction is fast. + virtual bool isCtlzFast() const { + return false; + } + + /// Return true if it is safe to transform an integer-domain bitwise operation + /// into the equivalent floating-point operation. This should be set to true + /// if the target has IEEE-754-compliant fabs/fneg operations for the input + /// type. + virtual bool hasBitPreservingFPLogic(EVT VT) const { + return false; + } + + /// \brief Return true if it is cheaper to split the store of a merged int val + /// from a pair of smaller values into multiple stores. + virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { + return false; + } + + /// \brief Return if the target supports combining a + /// chain like: + /// \code + /// %andResult = and %val1, #mask + /// %icmpResult = icmp %andResult, 0 + /// \endcode + /// into a single machine instruction of a form like: + /// \code + /// cc = test %register, #mask + /// \endcode + virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { + return false; + } + + /// Use bitwise logic to make pairs of compares more efficient. For example: + /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 + /// This should be true when it takes more than one instruction to lower + /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on + /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. + virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { + return false; + } + + /// Return the preferred operand type if the target has a quick way to compare + /// integer values of the given size. Assume that any legal integer type can + /// be compared efficiently. Targets may override this to allow illegal wide + /// types to return a vector type if there is support to compare that type. + virtual MVT hasFastEqualityCompare(unsigned NumBits) const { + MVT VT = MVT::getIntegerVT(NumBits); + return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; + } + + /// Return true if the target should transform: + /// (X & Y) == Y ---> (~X & Y) == 0 + /// (X & Y) != Y ---> (~X & Y) != 0 + /// + /// This may be profitable if the target has a bitwise and-not operation that + /// sets comparison flags. A target may want to limit the transformation based + /// on the type of Y or if Y is a constant. + /// + /// Note that the transform will not occur if Y is known to be a power-of-2 + /// because a mask and compare of a single bit can be handled by inverting the + /// predicate, for example: + /// (X & 8) == 8 ---> (X & 8) != 0 + virtual bool hasAndNotCompare(SDValue Y) const { + return false; + } + + /// Return true if the target has a bitwise and-not operation: + /// X = ~A & B + /// This can be used to simplify select or other instructions. + virtual bool hasAndNot(SDValue X) const { + // If the target has the more complex version of this operation, assume that + // it has this operation too. + return hasAndNotCompare(X); + } + + /// \brief Return true if the target wants to use the optimization that + /// turns ext(promotableInst1(...(promotableInstN(load)))) into + /// promotedInst1(...(promotedInstN(ext(load)))). + bool enableExtLdPromotion() const { return EnableExtLdPromotion; } + + /// Return true if the target can combine store(extractelement VectorTy, + /// Idx). + /// \p Cost[out] gives the cost of that transformation when this is true. + virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const { + return false; + } + + /// Return true if target supports floating point exceptions. + bool hasFloatingPointExceptions() const { + return HasFloatingPointExceptions; + } + + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(EVT VT) const { + return false; + } + + /// Return the ValueType of the result of SETCC operations. + virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const; + + /// Return the ValueType for comparison libcalls. Comparions libcalls include + /// floating point comparion calls, and Ordered/Unordered check calls on + /// floating point numbers. + virtual + MVT::SimpleValueType getCmpLibcallReturnType() const; + + /// For targets without i1 registers, this gives the nature of the high-bits + /// of boolean values held in types wider than i1. + /// + /// "Boolean values" are special true/false values produced by nodes like + /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. + /// Not to be confused with general values promoted from i1. Some cpus + /// distinguish between vectors of boolean and scalars; the isVec parameter + /// selects between the two kinds. For example on X86 a scalar boolean should + /// be zero extended from i1, while the elements of a vector of booleans + /// should be sign extended from i1. + /// + /// Some cpus also treat floating point types the same way as they treat + /// vectors instead of the way they treat scalars. + BooleanContent getBooleanContents(bool isVec, bool isFloat) const { + if (isVec) + return BooleanVectorContents; + return isFloat ? BooleanFloatContents : BooleanContents; + } + + BooleanContent getBooleanContents(EVT Type) const { + return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); + } + + /// Return target scheduling preference. + Sched::Preference getSchedulingPreference() const { + return SchedPreferenceInfo; + } + + /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics + /// for different nodes. This function returns the preference (or none) for + /// the given node. + virtual Sched::Preference getSchedulingPreference(SDNode *) const { + return Sched::None; + } + + /// Return the register class that should be used for the specified value + /// type. + virtual const TargetRegisterClass *getRegClassFor(MVT VT) const { + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; + assert(RC && "This value type is not natively supported!"); + return RC; + } + + /// Return the 'representative' register class for the specified value + /// type. + /// + /// The 'representative' register class is the largest legal super-reg + /// register class for the register class of the value type. For example, on + /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep + /// register class is GR64 on x86_64. + virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { + const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; + return RC; + } + + /// Return the cost of the 'representative' register class for the specified + /// value type. + virtual uint8_t getRepRegClassCostFor(MVT VT) const { + return RepRegClassCostForVT[VT.SimpleTy]; + } + + /// Return true if the target has native support for the specified value type. + /// This means that it has a register that directly holds it without + /// promotions or expansions. + bool isTypeLegal(EVT VT) const { + assert(!VT.isSimple() || + (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); + return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; + } + + class ValueTypeActionImpl { + /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum + /// that indicates how instruction selection should deal with the type. + LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; + + public: + ValueTypeActionImpl() { + std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), + TypeLegal); + } + + LegalizeTypeAction getTypeAction(MVT VT) const { + return ValueTypeActions[VT.SimpleTy]; + } + + void setTypeAction(MVT VT, LegalizeTypeAction Action) { + ValueTypeActions[VT.SimpleTy] = Action; + } + }; + + const ValueTypeActionImpl &getValueTypeActions() const { + return ValueTypeActions; + } + + /// Return how we should legalize values of this type, either it is already + /// legal (return 'Legal') or we need to promote it to a larger type (return + /// 'Promote'), or we need to expand it into multiple registers of smaller + /// integer type (return 'Expand'). 'Custom' is not an option. + LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { + return getTypeConversion(Context, VT).first; + } + LegalizeTypeAction getTypeAction(MVT VT) const { + return ValueTypeActions.getTypeAction(VT); + } + + /// For types supported by the target, this is an identity function. For + /// types that must be promoted to larger types, this returns the larger type + /// to promote to. For integer types that are larger than the largest integer + /// register, this contains one step in the expansion to get to the smaller + /// register. For illegal floating point types, this returns the integer type + /// to transform to. + EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { + return getTypeConversion(Context, VT).second; + } + + /// For types supported by the target, this is an identity function. For + /// types that must be expanded (i.e. integer types that are larger than the + /// largest integer register or illegal floating point types), this returns + /// the largest legal type it will be expanded to. + EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { + assert(!VT.isVector()); + while (true) { + switch (getTypeAction(Context, VT)) { + case TypeLegal: + return VT; + case TypeExpandInteger: + VT = getTypeToTransformTo(Context, VT); + break; + default: + llvm_unreachable("Type is not legal nor is it to be expanded!"); + } + } + } + + /// Vector types are broken down into some number of legal first class types. + /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 + /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 + /// turns into 4 EVT::i32 values with both PPC and X86. + /// + /// This method returns the number of registers needed, and the VT for each + /// register. It also returns the VT and quantity of the intermediate values + /// before they are promoted/expanded. + unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const; + + /// Certain targets such as MIPS require that some types such as vectors are + /// always broken down into scalars in some contexts. This occurs even if the + /// vector type is legal. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, + RegisterVT); + } + + struct IntrinsicInfo { + unsigned opc = 0; // target opcode + EVT memVT; // memory VT + + // value representing memory location + PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; + + int offset = 0; // offset off of ptrVal + unsigned size = 0; // the size of the memory location + // (taken from memVT if zero) + unsigned align = 1; // alignment + + MachineMemOperand::Flags flags = MachineMemOperand::MONone; + IntrinsicInfo() = default; + }; + + /// Given an intrinsic, checks if on the target the intrinsic will need to map + /// to a MemIntrinsicNode (touches memory). If this is the case, it returns + /// true and store the intrinsic information into the IntrinsicInfo that was + /// passed to the function. + virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, + MachineFunction &, + unsigned /*Intrinsic*/) const { + return false; + } + + /// Returns true if the target can instruction select the specified FP + /// immediate natively. If false, the legalizer will materialize the FP + /// immediate as a load from a constant pool. + virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const { + return false; + } + + /// Targets can use this to indicate that they only support *some* + /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a + /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be + /// legal. + virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { + return true; + } + + /// Returns true if the operation can trap for the value type. + /// + /// VT must be a legal type. By default, we optimistically assume most + /// operations don't trap except for integer divide and remainder. + virtual bool canOpTrap(unsigned Op, EVT VT) const; + + /// Similar to isShuffleMaskLegal. This is used by Targets can use this to + /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to replace + /// a VAND with a constant pool entry. + virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &/*Mask*/, + EVT /*VT*/) const { + return false; + } + + /// Return how this operation should be treated: either it is legal, needs to + /// be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction getOperationAction(unsigned Op, EVT VT) const { + if (VT.isExtended()) return Expand; + // If a target-specific SDNode requires legalization, require the target + // to provide custom legalization for it. + if (Op >= array_lengthof(OpActions[0])) return Custom; + return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; + } + + /// Return true if the specified operation is legal on this target or can be + /// made legal with custom lowering. This is used to help guide high-level + /// lowering decisions. + bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { + return (VT == MVT::Other || isTypeLegal(VT)) && + (getOperationAction(Op, VT) == Legal || + getOperationAction(Op, VT) == Custom); + } + + /// Return true if the specified operation is legal on this target or can be + /// made legal using promotion. This is used to help guide high-level lowering + /// decisions. + bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { + return (VT == MVT::Other || isTypeLegal(VT)) && + (getOperationAction(Op, VT) == Legal || + getOperationAction(Op, VT) == Promote); + } + + /// Return true if the specified operation is legal on this target or can be + /// made legal with custom lowering or using promotion. This is used to help + /// guide high-level lowering decisions. + bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { + return (VT == MVT::Other || isTypeLegal(VT)) && + (getOperationAction(Op, VT) == Legal || + getOperationAction(Op, VT) == Custom || + getOperationAction(Op, VT) == Promote); + } + + /// Return true if the operation uses custom lowering, regardless of whether + /// the type is legal or not. + bool isOperationCustom(unsigned Op, EVT VT) const { + return getOperationAction(Op, VT) == Custom; + } + + /// Return true if lowering to a jump table is allowed. + bool areJTsAllowed(const Function *Fn) const { + if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") + return false; + + return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + } + + /// Check whether the range [Low,High] fits in a machine word. + bool rangeFitsInWord(const APInt &Low, const APInt &High, + const DataLayout &DL) const { + // FIXME: Using the pointer type doesn't seem ideal. + uint64_t BW = DL.getPointerSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + return Range <= BW; + } + + /// Return true if lowering to a jump table is suitable for a set of case + /// clusters which may contain \p NumCases cases, \p Range range of values. + /// FIXME: This function check the maximum table size and density, but the + /// minimum size is not checked. It would be nice if the the minimum size is + /// also combined within this function. Currently, the minimum size check is + /// performed in findJumpTable() in SelectionDAGBuiler and + /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. + bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, + uint64_t Range) const { + const bool OptForSize = SI->getParent()->getParent()->optForSize(); + const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); + const unsigned MaxJumpTableSize = + OptForSize || getMaximumJumpTableSize() == 0 + ? UINT_MAX + : getMaximumJumpTableSize(); + // Check whether a range of clusters is dense enough for a jump table. + if (Range <= MaxJumpTableSize && + (NumCases * 100 >= Range * MinDensity)) { + return true; + } + return false; + } + + /// Return true if lowering to a bit test is suitable for a set of case + /// clusters which contains \p NumDests unique destinations, \p Low and + /// \p High as its lowest and highest case values, and expects \p NumCmps + /// case value comparisons. Check if the number of destinations, comparison + /// metric, and range are all suitable. + bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, + const APInt &Low, const APInt &High, + const DataLayout &DL) const { + // FIXME: I don't think NumCmps is the correct metric: a single case and a + // range of cases both require only one branch to lower. Just looking at the + // number of clusters and destinations should be enough to decide whether to + // build bit tests. + + // To lower a range with bit tests, the range must fit the bitwidth of a + // machine word. + if (!rangeFitsInWord(Low, High, DL)) + return false; + + // Decide whether it's profitable to lower this range with bit tests. Each + // destination requires a bit test and branch, and there is an overall range + // check branch. For a small number of clusters, separate comparisons might + // be cheaper, and for many destinations, splitting the range might be + // better. + return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || + (NumDests == 3 && NumCmps >= 6); + } + + /// Return true if the specified operation is illegal on this target or + /// unlikely to be made legal with custom lowering. This is used to help guide + /// high-level lowering decisions. + bool isOperationExpand(unsigned Op, EVT VT) const { + return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); + } + + /// Return true if the specified operation is legal on this target. + bool isOperationLegal(unsigned Op, EVT VT) const { + return (VT == MVT::Other || isTypeLegal(VT)) && + getOperationAction(Op, VT) == Legal; + } + + /// Return how this load with extension should be treated: either it is legal, + /// needs to be promoted to a larger size, needs to be expanded to some other + /// code sequence, or the target has a custom expander for it. + LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, + EVT MemVT) const { + if (ValVT.isExtended() || MemVT.isExtended()) return Expand; + unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; + unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; + assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && + MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); + unsigned Shift = 4 * ExtType; + return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); + } + + /// Return true if the specified load with extension is legal on this target. + bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { + return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; + } + + /// Return true if the specified load with extension is legal or custom + /// on this target. + bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { + return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || + getLoadExtAction(ExtType, ValVT, MemVT) == Custom; + } + + /// Return how this store with truncation should be treated: either it is + /// legal, needs to be promoted to a larger size, needs to be expanded to some + /// other code sequence, or the target has a custom expander for it. + LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { + if (ValVT.isExtended() || MemVT.isExtended()) return Expand; + unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; + unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; + assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && + "Table isn't big enough!"); + return TruncStoreActions[ValI][MemI]; + } + + /// Return true if the specified store with truncation is legal on this + /// target. + bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { + return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; + } + + /// Return true if the specified store with truncation has solution on this + /// target. + bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { + return isTypeLegal(ValVT) && + (getTruncStoreAction(ValVT, MemVT) == Legal || + getTruncStoreAction(ValVT, MemVT) == Custom); + } + + /// Return how the indexed load should be treated: either it is legal, needs + /// to be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction + getIndexedLoadAction(unsigned IdxMode, MVT VT) const { + assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && + "Table isn't big enough!"); + unsigned Ty = (unsigned)VT.SimpleTy; + return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); + } + + /// Return true if the specified indexed load is legal on this target. + bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { + return VT.isSimple() && + (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || + getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); + } + + /// Return how the indexed store should be treated: either it is legal, needs + /// to be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction + getIndexedStoreAction(unsigned IdxMode, MVT VT) const { + assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && + "Table isn't big enough!"); + unsigned Ty = (unsigned)VT.SimpleTy; + return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); + } + + /// Return true if the specified indexed load is legal on this target. + bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { + return VT.isSimple() && + (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || + getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); + } + + /// Return how the condition code should be treated: either it is legal, needs + /// to be expanded to some other code sequence, or the target has a custom + /// expander for it. + LegalizeAction + getCondCodeAction(ISD::CondCode CC, MVT VT) const { + assert((unsigned)CC < array_lengthof(CondCodeActions) && + ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && + "Table isn't big enough!"); + // See setCondCodeAction for how this is encoded. + uint32_t Shift = 4 * (VT.SimpleTy & 0x7); + uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; + LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); + assert(Action != Promote && "Can't promote condition code!"); + return Action; + } + + /// Return true if the specified condition code is legal on this target. + bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { + return + getCondCodeAction(CC, VT) == Legal || + getCondCodeAction(CC, VT) == Custom; + } + + /// If the action for this operation is to promote, this method returns the + /// ValueType to promote to. + MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { + assert(getOperationAction(Op, VT) == Promote && + "This operation isn't promoted!"); + + // See if this has an explicit type specified. + std::map<std::pair<unsigned, MVT::SimpleValueType>, + MVT::SimpleValueType>::const_iterator PTTI = + PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); + if (PTTI != PromoteToType.end()) return PTTI->second; + + assert((VT.isInteger() || VT.isFloatingPoint()) && + "Cannot autopromote this type, add it with AddPromotedToType."); + + MVT NVT = VT; + do { + NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); + assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && + "Didn't find type to promote to!"); + } while (!isTypeLegal(NVT) || + getOperationAction(Op, NVT) == Promote); + return NVT; + } + + /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM + /// operations except for the pointer size. If AllowUnknown is true, this + /// will return MVT::Other for types with no EVT counterpart (e.g. structs), + /// otherwise it will assert. + EVT getValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const { + // Lower scalar pointers to native pointer types. + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) + return getPointerTy(DL, PTy->getAddressSpace()); + + if (Ty->isVectorTy()) { + VectorType *VTy = cast<VectorType>(Ty); + Type *Elm = VTy->getElementType(); + // Lower vectors of pointers to native pointer types. + if (PointerType *PT = dyn_cast<PointerType>(Elm)) { + EVT PointerTy(getPointerTy(DL, PT->getAddressSpace())); + Elm = PointerTy.getTypeForEVT(Ty->getContext()); + } + + return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), + VTy->getNumElements()); + } + return EVT::getEVT(Ty, AllowUnknown); + } + + /// Return the MVT corresponding to this LLVM type. See getValueType. + MVT getSimpleValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const { + return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); + } + + /// Return the desired alignment for ByVal or InAlloca aggregate function + /// arguments in the caller parameter area. This is the actual alignment, not + /// its logarithm. + virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; + + /// Return the type of registers that this ValueType will eventually require. + MVT getRegisterType(MVT VT) const { + assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); + return RegisterTypeForVT[VT.SimpleTy]; + } + + /// Return the type of registers that this ValueType will eventually require. + MVT getRegisterType(LLVMContext &Context, EVT VT) const { + if (VT.isSimple()) { + assert((unsigned)VT.getSimpleVT().SimpleTy < + array_lengthof(RegisterTypeForVT)); + return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; + } + if (VT.isVector()) { + EVT VT1; + MVT RegisterVT; + unsigned NumIntermediates; + (void)getVectorTypeBreakdown(Context, VT, VT1, + NumIntermediates, RegisterVT); + return RegisterVT; + } + if (VT.isInteger()) { + return getRegisterType(Context, getTypeToTransformTo(Context, VT)); + } + llvm_unreachable("Unsupported extended type!"); + } + + /// Return the number of registers that this ValueType will eventually + /// require. + /// + /// This is one for any types promoted to live in larger registers, but may be + /// more than one for types (like i64) that are split into pieces. For types + /// like i140, which are first promoted then expanded, it is the number of + /// registers needed to hold all the bits of the original type. For an i140 + /// on a 32 bit machine this means 5 registers. + unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { + if (VT.isSimple()) { + assert((unsigned)VT.getSimpleVT().SimpleTy < + array_lengthof(NumRegistersForVT)); + return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; + } + if (VT.isVector()) { + EVT VT1; + MVT VT2; + unsigned NumIntermediates; + return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); + } + if (VT.isInteger()) { + unsigned BitWidth = VT.getSizeInBits(); + unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); + return (BitWidth + RegWidth - 1) / RegWidth; + } + llvm_unreachable("Unsupported extended type!"); + } + + /// Certain combinations of ABIs, Targets and features require that types + /// are legal for some operations and not for other operations. + /// For MIPS all vector types must be passed through the integer register set. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const { + return getRegisterType(VT); + } + + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + return getRegisterType(Context, VT); + } + + /// Certain targets require unusual breakdowns of certain types. For MIPS, + /// this occurs when a vector type is used, as vector are passed through the + /// integer register set. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + return getNumRegisters(Context, VT); + } + + /// Certain targets have context senstive alignment requirements, where one + /// type has the alignment requirement of another type. + virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return DL.getABITypeAlignment(ArgTy); + } + + /// If true, then instruction selection should seek to shrink the FP constant + /// of the specified type to a smaller type in order to save space and / or + /// reduce runtime. + virtual bool ShouldShrinkFPConstant(EVT) const { return true; } + + // Return true if it is profitable to reduce the given load node to a smaller + // type. + // + // e.g. (i16 (trunc (i32 (load x))) -> i16 load x should be performed + virtual bool shouldReduceLoadWidth(SDNode *Load, + ISD::LoadExtType ExtTy, + EVT NewVT) const { + return true; + } + + /// When splitting a value of the specified type into parts, does the Lo + /// or Hi part come first? This usually follows the endianness, except + /// for ppcf128, where the Hi part always comes first. + bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { + return DL.isBigEndian() || VT == MVT::ppcf128; + } + + /// If true, the target has custom DAG combine transformations that it can + /// perform for the specified node. + bool hasTargetDAGCombine(ISD::NodeType NT) const { + assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); + return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); + } + + unsigned getGatherAllAliasesMaxDepth() const { + return GatherAllAliasesMaxDepth; + } + + /// Returns the size of the platform's va_list object. + virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { + return getPointerTy(DL).getSizeInBits(); + } + + /// \brief Get maximum # of store operations permitted for llvm.memset + /// + /// This function returns the maximum number of store operations permitted + /// to replace a call to llvm.memset. The value is set by the target at the + /// performance threshold for such a replacement. If OptSize is true, + /// return the limit for functions that have OptSize attribute. + unsigned getMaxStoresPerMemset(bool OptSize) const { + return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; + } + + /// \brief Get maximum # of store operations permitted for llvm.memcpy + /// + /// This function returns the maximum number of store operations permitted + /// to replace a call to llvm.memcpy. The value is set by the target at the + /// performance threshold for such a replacement. If OptSize is true, + /// return the limit for functions that have OptSize attribute. + unsigned getMaxStoresPerMemcpy(bool OptSize) const { + return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; + } + + /// Get maximum # of load operations permitted for memcmp + /// + /// This function returns the maximum number of load operations permitted + /// to replace a call to memcmp. The value is set by the target at the + /// performance threshold for such a replacement. If OptSize is true, + /// return the limit for functions that have OptSize attribute. + unsigned getMaxExpandSizeMemcmp(bool OptSize) const { + return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; + } + + /// \brief Get maximum # of store operations permitted for llvm.memmove + /// + /// This function returns the maximum number of store operations permitted + /// to replace a call to llvm.memmove. The value is set by the target at the + /// performance threshold for such a replacement. If OptSize is true, + /// return the limit for functions that have OptSize attribute. + unsigned getMaxStoresPerMemmove(bool OptSize) const { + return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; + } + + /// \brief Determine if the target supports unaligned memory accesses. + /// + /// This function returns true if the target allows unaligned memory accesses + /// of the specified type in the given address space. If true, it also returns + /// whether the unaligned memory access is "fast" in the last argument by + /// reference. This is used, for example, in situations where an array + /// copy/move/set is converted to a sequence of store operations. Its use + /// helps to ensure that such replacements don't generate code that causes an + /// alignment error (trap) on the target machine. + virtual bool allowsMisalignedMemoryAccesses(EVT, + unsigned AddrSpace = 0, + unsigned Align = 1, + bool * /*Fast*/ = nullptr) const { + return false; + } + + /// Return true if the target supports a memory access of this type for the + /// given address space and alignment. If the access is allowed, the optional + /// final parameter returns if the access is also fast (as defined by the + /// target). + bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace = 0, unsigned Alignment = 1, + bool *Fast = nullptr) const; + + /// Returns the target specific optimal type for load and store operations as + /// a result of memset, memcpy, and memmove lowering. + /// + /// If DstAlign is zero that means it's safe to destination alignment can + /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't + /// a need to check it against alignment requirement, probably because the + /// source does not need to be loaded. If 'IsMemset' is true, that means it's + /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of + /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it + /// does not need to be loaded. It returns EVT::Other if the type should be + /// determined using generic target-independent logic. + virtual EVT getOptimalMemOpType(uint64_t /*Size*/, + unsigned /*DstAlign*/, unsigned /*SrcAlign*/, + bool /*IsMemset*/, + bool /*ZeroMemset*/, + bool /*MemcpyStrSrc*/, + MachineFunction &/*MF*/) const { + return MVT::Other; + } + + /// Returns true if it's safe to use load / store of the specified type to + /// expand memcpy / memset inline. + /// + /// This is mostly true for all types except for some special cases. For + /// example, on X86 targets without SSE2 f64 load / store are done with fldl / + /// fstpl which also does type conversion. Note the specified type doesn't + /// have to be legal as the hook is used before type legalization. + virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } + + /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp. + bool usesUnderscoreSetJmp() const { + return UseUnderscoreSetJmp; + } + + /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp. + bool usesUnderscoreLongJmp() const { + return UseUnderscoreLongJmp; + } + + /// Return lower limit for number of blocks in a jump table. + unsigned getMinimumJumpTableEntries() const; + + /// Return lower limit of the density in a jump table. + unsigned getMinimumJumpTableDensity(bool OptForSize) const; + + /// Return upper limit for number of entries in a jump table. + /// Zero if no limit. + unsigned getMaximumJumpTableSize() const; + + virtual bool isJumpTableRelative() const { + return TM.isPositionIndependent(); + } + + /// If a physical register, this specifies the register that + /// llvm.savestack/llvm.restorestack should save and restore. + unsigned getStackPointerRegisterToSaveRestore() const { + return StackPointerRegisterToSaveRestore; + } + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + virtual unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const { + // 0 is guaranteed to be the NoRegister value on all targets + return 0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + virtual unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const { + // 0 is guaranteed to be the NoRegister value on all targets + return 0; + } + + virtual bool needsFixedCatchObjects() const { + report_fatal_error("Funclet EH is not implemented for this target"); + } + + /// Returns the target's jmp_buf size in bytes (if never set, the default is + /// 200) + unsigned getJumpBufSize() const { + return JumpBufSize; + } + + /// Returns the target's jmp_buf alignment in bytes (if never set, the default + /// is 0) + unsigned getJumpBufAlignment() const { + return JumpBufAlignment; + } + + /// Return the minimum stack alignment of an argument. + unsigned getMinStackArgumentAlignment() const { + return MinStackArgumentAlignment; + } + + /// Return the minimum function alignment. + unsigned getMinFunctionAlignment() const { + return MinFunctionAlignment; + } + + /// Return the preferred function alignment. + unsigned getPrefFunctionAlignment() const { + return PrefFunctionAlignment; + } + + /// Return the preferred loop alignment. + virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { + return PrefLoopAlignment; + } + + /// If the target has a standard location for the stack protector guard, + /// returns the address of that location. Otherwise, returns nullptr. + /// DEPRECATED: please override useLoadStackGuardNode and customize + /// LOAD_STACK_GUARD, or customize @llvm.stackguard(). + virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; + + /// Inserts necessary declarations for SSP (stack protection) purpose. + /// Should be used only when getIRStackGuard returns nullptr. + virtual void insertSSPDeclarations(Module &M) const; + + /// Return the variable that's previously inserted by insertSSPDeclarations, + /// if any, otherwise return nullptr. Should be used only when + /// getIRStackGuard returns nullptr. + virtual Value *getSDagStackGuard(const Module &M) const; + + /// If this function returns true, stack protection checks should XOR the + /// frame pointer (or whichever pointer is used to address locals) into the + /// stack guard value before checking it. getIRStackGuard must return nullptr + /// if this returns true. + virtual bool useStackGuardXorFP() const { return false; } + + /// If the target has a standard stack protection check function that + /// performs validation and error handling, returns the function. Otherwise, + /// returns nullptr. Must be previously inserted by insertSSPDeclarations. + /// Should be used only when getIRStackGuard returns nullptr. + virtual Value *getSSPStackGuardCheck(const Module &M) const; + +protected: + Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, + bool UseTLS) const; + +public: + /// Returns the target-specific address of the unsafe stack pointer. + virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; + + /// Returns the name of the symbol used to emit stack probes or the empty + /// string if not applicable. + virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { + return ""; + } + + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + return false; + } + + /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we + /// are happy to sink it into basic blocks. + virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + return isNoopAddrSpaceCast(SrcAS, DestAS); + } + + /// Return true if the pointer arguments to CI should be aligned by aligning + /// the object whose address is being passed. If so then MinSize is set to the + /// minimum size the object must be to be aligned and PrefAlign is set to the + /// preferred alignment. + virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, + unsigned & /*PrefAlign*/) const { + return false; + } + + //===--------------------------------------------------------------------===// + /// \name Helpers for TargetTransformInfo implementations + /// @{ + + /// Get the ISD node that corresponds to the Instruction class opcode. + int InstructionOpcodeToISD(unsigned Opcode) const; + + /// Estimate the cost of type-legalization and the legalized type. + std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const; + + /// @} + + //===--------------------------------------------------------------------===// + /// \name Helpers for atomic expansion. + /// @{ + + /// Returns the maximum atomic operation size (in bits) supported by + /// the backend. Atomic operations greater than this size (as well + /// as ones that are not naturally aligned), will be expanded by + /// AtomicExpandPass into an __atomic_* library call. + unsigned getMaxAtomicSizeInBitsSupported() const { + return MaxAtomicSizeInBitsSupported; + } + + /// Returns the size of the smallest cmpxchg or ll/sc instruction + /// the backend supports. Any smaller operations are widened in + /// AtomicExpandPass. + /// + /// Note that *unlike* operations above the maximum size, atomic ops + /// are still natively supported below the minimum; they just + /// require a more complex expansion. + unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } + + /// Whether the target supports unaligned atomic operations. + bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } + + /// Whether AtomicExpandPass should automatically insert fences and reduce + /// ordering for this atomic. This should be true for most architectures with + /// weak memory ordering. Defaults to false. + virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { + return false; + } + + /// Perform a load-linked operation on Addr, returning a "Value *" with the + /// corresponding pointee type. This may entail some non-trivial operations to + /// truncate or reconstruct types that will be illegal in the backend. See + /// ARMISelLowering for an example implementation. + virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + llvm_unreachable("Load linked unimplemented on this target"); + } + + /// Perform a store-conditional operation to Addr. Return the status of the + /// store. This should be 0 if the store succeeded, non-zero otherwise. + virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const { + llvm_unreachable("Store conditional unimplemented on this target"); + } + + /// Inserts in the IR a target-specific intrinsic specifying a fence. + /// It is called by AtomicExpandPass before expanding an + /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad + /// if shouldInsertFencesForAtomic returns true. + /// + /// Inst is the original atomic instruction, prior to other expansions that + /// may be performed. + /// + /// This function should either return a nullptr, or a pointer to an IR-level + /// Instruction*. Even complex fence sequences can be represented by a + /// single Instruction* through an intrinsic to be lowered later. + /// Backends should override this method to produce target-specific intrinsic + /// for their fences. + /// FIXME: Please note that the default implementation here in terms of + /// IR-level fences exists for historical/compatibility reasons and is + /// *unsound* ! Fences cannot, in general, be used to restore sequential + /// consistency. For example, consider the following example: + /// atomic<int> x = y = 0; + /// int r1, r2, r3, r4; + /// Thread 0: + /// x.store(1); + /// Thread 1: + /// y.store(1); + /// Thread 2: + /// r1 = x.load(); + /// r2 = y.load(); + /// Thread 3: + /// r3 = y.load(); + /// r4 = x.load(); + /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all + /// seq_cst. But if they are lowered to monotonic accesses, no amount of + /// IR-level fences can prevent it. + /// @{ + virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const { + if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) + return Builder.CreateFence(Ord); + else + return nullptr; + } + + virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isAcquireOrStronger(Ord)) + return Builder.CreateFence(Ord); + else + return nullptr; + } + /// @} + + // Emits code that executes when the comparison result in the ll/sc + // expansion of a cmpxchg instruction is such that the store-conditional will + // not execute. This makes it possible to balance out the load-linked with + // a dedicated instruction, if desired. + // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would + // be unnecessarily held, except if clrex, inserted by this hook, is executed. + virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} + + /// Returns true if the given (atomic) store should be expanded by the + /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. + virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { + return false; + } + + /// Returns true if arguments should be sign-extended in lib calls. + virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + return IsSigned; + } + + /// Returns how the given (atomic) load should be expanded by the + /// IR-level AtomicExpand pass. + virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { + return AtomicExpansionKind::None; + } + + /// Returns true if the given atomic cmpxchg should be expanded by the + /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence + /// (through emitLoadLinked() and emitStoreConditional()). + virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { + return false; + } + + /// Returns how the IR-level AtomicExpand pass should expand the given + /// AtomicRMW, if at all. Default is to never expand. + virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { + return AtomicExpansionKind::None; + } + + /// On some platforms, an AtomicRMW that never actually modifies the value + /// (such as fetch_add of 0) can be turned into a fence followed by an + /// atomic load. This may sound useless, but it makes it possible for the + /// processor to keep the cacheline shared, dramatically improving + /// performance. And such idempotent RMWs are useful for implementing some + /// kinds of locks, see for example (justification + benchmarks): + /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf + /// This method tries doing that transformation, returning the atomic load if + /// it succeeds, and nullptr otherwise. + /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo + /// another round of expansion. + virtual LoadInst * + lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { + return nullptr; + } + + /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, + /// SIGN_EXTEND, or ANY_EXTEND). + virtual ISD::NodeType getExtendForAtomicOps() const { + return ISD::ZERO_EXTEND; + } + + /// @} + + /// Returns true if we should normalize + /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and + /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely + /// that it saves us from materializing N0 and N1 in an integer register. + /// Targets that are able to perform and/or on flags should return false here. + virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, + EVT VT) const { + // If a target has multiple condition registers, then it likely has logical + // operations on those registers. + if (hasMultipleConditionRegisters()) + return false; + // Only do the transform if the value won't be split into multiple + // registers. + LegalizeTypeAction Action = getTypeAction(Context, VT); + return Action != TypeExpandInteger && Action != TypeExpandFloat && + Action != TypeSplitVector; + } + + /// Return true if a select of constants (select Cond, C1, C2) should be + /// transformed into simple math ops with the condition value. For example: + /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 + virtual bool convertSelectOfConstantsToMath(EVT VT) const { + return false; + } + + //===--------------------------------------------------------------------===// + // TargetLowering Configuration Methods - These methods should be invoked by + // the derived class constructor to configure this object for the target. + // +protected: + /// Specify how the target extends the result of integer and floating point + /// boolean values from i1 to a wider type. See getBooleanContents. + void setBooleanContents(BooleanContent Ty) { + BooleanContents = Ty; + BooleanFloatContents = Ty; + } + + /// Specify how the target extends the result of integer and floating point + /// boolean values from i1 to a wider type. See getBooleanContents. + void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { + BooleanContents = IntTy; + BooleanFloatContents = FloatTy; + } + + /// Specify how the target extends the result of a vector boolean value from a + /// vector of i1 to a wider type. See getBooleanContents. + void setBooleanVectorContents(BooleanContent Ty) { + BooleanVectorContents = Ty; + } + + /// Specify the target scheduling preference. + void setSchedulingPreference(Sched::Preference Pref) { + SchedPreferenceInfo = Pref; + } + + /// Indicate whether this target prefers to use _setjmp to implement + /// llvm.setjmp or the version without _. Defaults to false. + void setUseUnderscoreSetJmp(bool Val) { + UseUnderscoreSetJmp = Val; + } + + /// Indicate whether this target prefers to use _longjmp to implement + /// llvm.longjmp or the version without _. Defaults to false. + void setUseUnderscoreLongJmp(bool Val) { + UseUnderscoreLongJmp = Val; + } + + /// Indicate the minimum number of blocks to generate jump tables. + void setMinimumJumpTableEntries(unsigned Val); + + /// Indicate the maximum number of entries in jump tables. + /// Set to zero to generate unlimited jump tables. + void setMaximumJumpTableSize(unsigned); + + /// If set to a physical register, this specifies the register that + /// llvm.savestack/llvm.restorestack should save and restore. + void setStackPointerRegisterToSaveRestore(unsigned R) { + StackPointerRegisterToSaveRestore = R; + } + + /// Tells the code generator that the target has multiple (allocatable) + /// condition registers that can be used to store the results of comparisons + /// for use by selects and conditional branches. With multiple condition + /// registers, the code generator will not aggressively sink comparisons into + /// the blocks of their users. + void setHasMultipleConditionRegisters(bool hasManyRegs = true) { + HasMultipleConditionRegisters = hasManyRegs; + } + + /// Tells the code generator that the target has BitExtract instructions. + /// The code generator will aggressively sink "shift"s into the blocks of + /// their users if the users will generate "and" instructions which can be + /// combined with "shift" to BitExtract instructions. + void setHasExtractBitsInsn(bool hasExtractInsn = true) { + HasExtractBitsInsn = hasExtractInsn; + } + + /// Tells the code generator not to expand logic operations on comparison + /// predicates into separate sequences that increase the amount of flow + /// control. + void setJumpIsExpensive(bool isExpensive = true); + + /// Tells the code generator that this target supports floating point + /// exceptions and cares about preserving floating point exception behavior. + void setHasFloatingPointExceptions(bool FPExceptions = true) { + HasFloatingPointExceptions = FPExceptions; + } + + /// Tells the code generator which bitwidths to bypass. + void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { + BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; + } + + /// Add the specified register class as an available regclass for the + /// specified value type. This indicates the selector can handle values of + /// that class natively. + void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { + assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); + RegClassForVT[VT.SimpleTy] = RC; + } + + /// Return the largest legal super-reg register class of the register class + /// for the specified type and its associated "cost". + virtual std::pair<const TargetRegisterClass *, uint8_t> + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; + + /// Once all of the register classes are added, this allows us to compute + /// derived properties we expose. + void computeRegisterProperties(const TargetRegisterInfo *TRI); + + /// Indicate that the specified operation does not work with the specified + /// type and indicate what to do about it. Note that VT may refer to either + /// the type of a result or that of an operand of Op. + void setOperationAction(unsigned Op, MVT VT, + LegalizeAction Action) { + assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); + OpActions[(unsigned)VT.SimpleTy][Op] = Action; + } + + /// Indicate that the specified load with extension does not work with the + /// specified type and indicate what to do about it. + void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, + LegalizeAction Action) { + assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && + MemVT.isValid() && "Table isn't big enough!"); + assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); + unsigned Shift = 4 * ExtType; + LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); + LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; + } + + /// Indicate that the specified truncating store does not work with the + /// specified type and indicate what to do about it. + void setTruncStoreAction(MVT ValVT, MVT MemVT, + LegalizeAction Action) { + assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); + TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; + } + + /// Indicate that the specified indexed load does or does not work with the + /// specified type and indicate what to do abort it. + /// + /// NOTE: All indexed mode loads are initialized to Expand in + /// TargetLowering.cpp + void setIndexedLoadAction(unsigned IdxMode, MVT VT, + LegalizeAction Action) { + assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && + (unsigned)Action < 0xf && "Table isn't big enough!"); + // Load action are kept in the upper half. + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; + } + + /// Indicate that the specified indexed store does or does not work with the + /// specified type and indicate what to do about it. + /// + /// NOTE: All indexed mode stores are initialized to Expand in + /// TargetLowering.cpp + void setIndexedStoreAction(unsigned IdxMode, MVT VT, + LegalizeAction Action) { + assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && + (unsigned)Action < 0xf && "Table isn't big enough!"); + // Store action are kept in the lower half. + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); + } + + /// Indicate that the specified condition code is or isn't supported on the + /// target and indicate what to do about it. + void setCondCodeAction(ISD::CondCode CC, MVT VT, + LegalizeAction Action) { + assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && + "Table isn't big enough!"); + assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); + /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit + /// value and the upper 29 bits index into the second dimension of the array + /// to select what 32-bit value to use. + uint32_t Shift = 4 * (VT.SimpleTy & 0x7); + CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); + CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; + } + + /// If Opc/OrigVT is specified as being promoted, the promotion code defaults + /// to trying a larger integer/fp until it can find one that works. If that + /// default is insufficient, this method can be used by the target to override + /// the default. + void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { + PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; + } + + /// Convenience method to set an operation to Promote and specify the type + /// in a single call. + void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { + setOperationAction(Opc, OrigVT, Promote); + AddPromotedToType(Opc, OrigVT, DestVT); + } + + /// Targets should invoke this method for each target independent node that + /// they want to provide a custom DAG combiner for by implementing the + /// PerformDAGCombine virtual method. + void setTargetDAGCombine(ISD::NodeType NT) { + assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); + TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); + } + + /// Set the target's required jmp_buf buffer size (in bytes); default is 200 + void setJumpBufSize(unsigned Size) { + JumpBufSize = Size; + } + + /// Set the target's required jmp_buf buffer alignment (in bytes); default is + /// 0 + void setJumpBufAlignment(unsigned Align) { + JumpBufAlignment = Align; + } + + /// Set the target's minimum function alignment (in log2(bytes)) + void setMinFunctionAlignment(unsigned Align) { + MinFunctionAlignment = Align; + } + + /// Set the target's preferred function alignment. This should be set if + /// there is a performance benefit to higher-than-minimum alignment (in + /// log2(bytes)) + void setPrefFunctionAlignment(unsigned Align) { + PrefFunctionAlignment = Align; + } + + /// Set the target's preferred loop alignment. Default alignment is zero, it + /// means the target does not care about loop alignment. The alignment is + /// specified in log2(bytes). The target may also override + /// getPrefLoopAlignment to provide per-loop values. + void setPrefLoopAlignment(unsigned Align) { + PrefLoopAlignment = Align; + } + + /// Set the minimum stack alignment of an argument (in log2(bytes)). + void setMinStackArgumentAlignment(unsigned Align) { + MinStackArgumentAlignment = Align; + } + + /// Set the maximum atomic operation size supported by the + /// backend. Atomic operations greater than this size (as well as + /// ones that are not naturally aligned), will be expanded by + /// AtomicExpandPass into an __atomic_* library call. + void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { + MaxAtomicSizeInBitsSupported = SizeInBits; + } + + /// Sets the minimum cmpxchg or ll/sc size supported by the backend. + void setMinCmpXchgSizeInBits(unsigned SizeInBits) { + MinCmpXchgSizeInBits = SizeInBits; + } + + /// Sets whether unaligned atomic operations are supported. + void setSupportsUnalignedAtomics(bool UnalignedSupported) { + SupportsUnalignedAtomics = UnalignedSupported; + } + +public: + //===--------------------------------------------------------------------===// + // Addressing mode description hooks (used by LSR etc). + // + + /// CodeGenPrepare sinks address calculations into the same BB as Load/Store + /// instructions reading the address. This allows as much computation as + /// possible to be done in the address mode for that operand. This hook lets + /// targets also pass back when this should be done on intrinsics which + /// load/store. + virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, + SmallVectorImpl<Value*> &/*Ops*/, + Type *&/*AccessTy*/) const { + return false; + } + + /// This represents an addressing mode of: + /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + /// If BaseGV is null, there is no BaseGV. + /// If BaseOffs is zero, there is no base offset. + /// If HasBaseReg is false, there is no base register. + /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with + /// no scale. + struct AddrMode { + GlobalValue *BaseGV = nullptr; + int64_t BaseOffs = 0; + bool HasBaseReg = false; + int64_t Scale = 0; + AddrMode() = default; + }; + + /// Return true if the addressing mode represented by AM is legal for this + /// target, for a load/store of the specified type. + /// + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. TODO: Handle + /// pre/postinc as well. + /// + /// If the address space cannot be determined, it will be -1. + /// + /// TODO: Remove default argument + virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AddrSpace, + Instruction *I = nullptr) const; + + /// \brief Return the cost of the scaling factor used in the addressing mode + /// represented by AM for this target, for a load/store of the specified type. + /// + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, it returns a negative value. + /// TODO: Handle pre/postinc as well. + /// TODO: Remove default argument + virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS = 0) const { + // Default: assume that any scaling factor used in a legal AM is free. + if (isLegalAddressingMode(DL, AM, Ty, AS)) + return 0; + return -1; + } + + /// Return true if the specified immediate is legal icmp immediate, that is + /// the target has icmp instructions which can compare a register against the + /// immediate without having to materialize the immediate into a register. + virtual bool isLegalICmpImmediate(int64_t) const { + return true; + } + + /// Return true if the specified immediate is legal add immediate, that is the + /// target has add instructions which can add a register with the immediate + /// without having to materialize the immediate into a register. + virtual bool isLegalAddImmediate(int64_t) const { + return true; + } + + /// Return true if it's significantly cheaper to shift a vector by a uniform + /// scalar than by an amount which will vary across each lane. On x86, for + /// example, there is a "psllw" instruction for the former case, but no simple + /// instruction for a general "a << b" operation on vectors. + virtual bool isVectorShiftByScalarCheap(Type *Ty) const { + return false; + } + + /// Returns true if the opcode is a commutative binary operation. + virtual bool isCommutativeBinOp(unsigned Opcode) const { + // FIXME: This should get its info from the td file. + switch (Opcode) { + case ISD::ADD: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + case ISD::FADD: + case ISD::FMUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SADDO: + case ISD::UADDO: + case ISD::ADDC: + case ISD::ADDE: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + return true; + default: return false; + } + } + + /// Return true if it's free to truncate a value of type FromTy to type + /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 + /// by referencing its sub-register AX. + /// Targets must return false when FromTy <= ToTy. + virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { + return false; + } + + /// Return true if a truncation from FromTy to ToTy is permitted when deciding + /// whether a call is in tail position. Typically this means that both results + /// would be assigned to the same register or stack slot, but it could mean + /// the target performs adequate checks of its own before proceeding with the + /// tail call. Targets must return false when FromTy <= ToTy. + virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { + return false; + } + + virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { + return false; + } + + virtual bool isProfitableToHoist(Instruction *I) const { return true; } + + /// Return true if the extension represented by \p I is free. + /// Unlikely the is[Z|FP]ExtFree family which is based on types, + /// this method can use the context provided by \p I to decide + /// whether or not \p I is free. + /// This method extends the behavior of the is[Z|FP]ExtFree family. + /// In other words, if is[Z|FP]Free returns true, then this method + /// returns true as well. The converse is not true. + /// The target can perform the adequate checks by overriding isExtFreeImpl. + /// \pre \p I must be a sign, zero, or fp extension. + bool isExtFree(const Instruction *I) const { + switch (I->getOpcode()) { + case Instruction::FPExt: + if (isFPExtFree(EVT::getEVT(I->getType()), + EVT::getEVT(I->getOperand(0)->getType()))) + return true; + break; + case Instruction::ZExt: + if (isZExtFree(I->getOperand(0)->getType(), I->getType())) + return true; + break; + case Instruction::SExt: + break; + default: + llvm_unreachable("Instruction is not an extension"); + } + return isExtFreeImpl(I); + } + + /// Return true if \p Load and \p Ext can form an ExtLoad. + /// For example, in AArch64 + /// %L = load i8, i8* %ptr + /// %E = zext i8 %L to i32 + /// can be lowered into one load instruction + /// ldrb w0, [x0] + bool isExtLoad(const LoadInst *Load, const Instruction *Ext, + const DataLayout &DL) const { + EVT VT = getValueType(DL, Ext->getType()); + EVT LoadVT = getValueType(DL, Load->getType()); + + // If the load has other users and the truncate is not free, the ext + // probably isn't free. + if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && + !isTruncateFree(Ext->getType(), Load->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa<ZExtInst>(Ext)) + LType = ISD::ZEXTLOAD; + else { + assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + + return isLoadExtLegal(LType, VT, LoadVT); + } + + /// Return true if any actual instruction that defines a value of type FromTy + /// implicitly zero-extends the value to ToTy in the result register. + /// + /// The function should return true when it is likely that the truncate can + /// be freely folded with an instruction defining a value of FromTy. If + /// the defining instruction is unknown (because you're looking at a + /// function argument, PHI, etc.) then the target may require an + /// explicit truncate, which is not necessarily free, but this function + /// does not deal with those cases. + /// Targets must return false when FromTy >= ToTy. + virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { + return false; + } + + virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { + return false; + } + + /// Return true if the target supplies and combines to a paired load + /// two loaded values of type LoadedType next to each other in memory. + /// RequiredAlignment gives the minimal alignment constraints that must be met + /// to be able to select this paired load. + /// + /// This information is *not* used to generate actual paired loads, but it is + /// used to generate a sequence of loads that is easier to combine into a + /// paired load. + /// For instance, something like this: + /// a = load i64* addr + /// b = trunc i64 a to i32 + /// c = lshr i64 a, 32 + /// d = trunc i64 c to i32 + /// will be optimized into: + /// b = load i32* addr1 + /// d = load i32* addr2 + /// Where addr1 = addr2 +/- sizeof(i32). + /// + /// In other words, unless the target performs a post-isel load combining, + /// this information should not be provided because it will generate more + /// loads. + virtual bool hasPairedLoad(EVT /*LoadedType*/, + unsigned & /*RequiredAlignment*/) const { + return false; + } + + /// \brief Get the maximum supported factor for interleaved memory accesses. + /// Default to be the minimum interleave factor: 2. + virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } + + /// \brief Lower an interleaved load to target specific intrinsics. Return + /// true on success. + /// + /// \p LI is the vector load instruction. + /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. + /// \p Indices is the corresponding indices for each shufflevector. + /// \p Factor is the interleave factor. + virtual bool lowerInterleavedLoad(LoadInst *LI, + ArrayRef<ShuffleVectorInst *> Shuffles, + ArrayRef<unsigned> Indices, + unsigned Factor) const { + return false; + } + + /// \brief Lower an interleaved store to target specific intrinsics. Return + /// true on success. + /// + /// \p SI is the vector store instruction. + /// \p SVI is the shufflevector to RE-interleave the stored vector. + /// \p Factor is the interleave factor. + virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, + unsigned Factor) const { + return false; + } + + /// Return true if zero-extending the specific node Val to type VT2 is free + /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or + /// because it's folded such as X86 zero-extending loads). + virtual bool isZExtFree(SDValue Val, EVT VT2) const { + return isZExtFree(Val.getValueType(), VT2); + } + + /// Return true if an fpext operation is free (for instance, because + /// single-precision floating-point numbers are implicitly extended to + /// double-precision). + virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { + assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && + "invalid fpext types"); + return false; + } + + /// Return true if an fpext operation input to an \p Opcode operation is free + /// (for instance, because half-precision floating-point numbers are + /// implicitly extended to float-precision) for an FMA instruction. + virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const { + assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && + "invalid fpext types"); + return isFPExtFree(DestVT, SrcVT); + } + + /// Return true if folding a vector load into ExtVal (a sign, zero, or any + /// extend node) is profitable. + virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } + + /// Return true if an fneg operation is free to the point where it is never + /// worthwhile to replace it with a bitwise operation. + virtual bool isFNegFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return false; + } + + /// Return true if an fabs operation is free to the point where it is never + /// worthwhile to replace it with a bitwise operation. + virtual bool isFAbsFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return false; + } + + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. + /// + /// NOTE: This may be called before legalization on types for which FMAs are + /// not legal, but should return true if those types will eventually legalize + /// to types that support FMAs. After legalization, it will only be called on + /// types that support FMAs (via Legal or Custom actions) + virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { + return false; + } + + /// Return true if it's profitable to narrow operations of type VT1 to + /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from + /// i32 to i16. + virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { + return false; + } + + /// \brief Return true if it is beneficial to convert a load of a constant to + /// just the constant itself. + /// On some targets it might be more efficient to use a combination of + /// arithmetic instructions to materialize the constant instead of loading it + /// from a constant pool. + virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + return false; + } + + /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type + /// from this source type with this index. This is needed because + /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of + /// the first element, and only the target knows which lowering is cheap. + virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const { + return false; + } + + // Return true if it is profitable to use a scalar input to a BUILD_VECTOR + // even if the vector itself has multiple uses. + virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { + return false; + } + + //===--------------------------------------------------------------------===// + // Runtime Library hooks + // + + /// Rename the default libcall routine name for the specified libcall. + void setLibcallName(RTLIB::Libcall Call, const char *Name) { + LibcallRoutineNames[Call] = Name; + } + + /// Get the libcall routine name for the specified libcall. + const char *getLibcallName(RTLIB::Libcall Call) const { + return LibcallRoutineNames[Call]; + } + + /// Override the default CondCode to be used to test the result of the + /// comparison libcall against zero. + void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { + CmpLibcallCCs[Call] = CC; + } + + /// Get the CondCode that's to be used to test the result of the comparison + /// libcall against zero. + ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { + return CmpLibcallCCs[Call]; + } + + /// Set the CallingConv that should be used for the specified libcall. + void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { + LibcallCallingConvs[Call] = CC; + } + + /// Get the CallingConv that should be used for the specified libcall. + CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { + return LibcallCallingConvs[Call]; + } + + /// Execute target specific actions to finalize target lowering. + /// This is used to set extra flags in MachineFrameInformation and freezing + /// the set of reserved registers. + /// The default implementation just freezes the set of reserved registers. + virtual void finalizeLowering(MachineFunction &MF) const; + +private: + const TargetMachine &TM; + + /// Tells the code generator that the target has multiple (allocatable) + /// condition registers that can be used to store the results of comparisons + /// for use by selects and conditional branches. With multiple condition + /// registers, the code generator will not aggressively sink comparisons into + /// the blocks of their users. + bool HasMultipleConditionRegisters; + + /// Tells the code generator that the target has BitExtract instructions. + /// The code generator will aggressively sink "shift"s into the blocks of + /// their users if the users will generate "and" instructions which can be + /// combined with "shift" to BitExtract instructions. + bool HasExtractBitsInsn; + + /// Tells the code generator to bypass slow divide or remainder + /// instructions. For example, BypassSlowDivWidths[32,8] tells the code + /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer + /// div/rem when the operands are positive and less than 256. + DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; + + /// Tells the code generator that it shouldn't generate extra flow control + /// instructions and should attempt to combine flow control instructions via + /// predication. + bool JumpIsExpensive; + + /// Whether the target supports or cares about preserving floating point + /// exception behavior. + bool HasFloatingPointExceptions; + + /// This target prefers to use _setjmp to implement llvm.setjmp. + /// + /// Defaults to false. + bool UseUnderscoreSetJmp; + + /// This target prefers to use _longjmp to implement llvm.longjmp. + /// + /// Defaults to false. + bool UseUnderscoreLongJmp; + + /// Information about the contents of the high-bits in boolean values held in + /// a type wider than i1. See getBooleanContents. + BooleanContent BooleanContents; + + /// Information about the contents of the high-bits in boolean values held in + /// a type wider than i1. See getBooleanContents. + BooleanContent BooleanFloatContents; + + /// Information about the contents of the high-bits in boolean vector values + /// when the element type is wider than i1. See getBooleanContents. + BooleanContent BooleanVectorContents; + + /// The target scheduling preference: shortest possible total cycles or lowest + /// register usage. + Sched::Preference SchedPreferenceInfo; + + /// The size, in bytes, of the target's jmp_buf buffers + unsigned JumpBufSize; + + /// The alignment, in bytes, of the target's jmp_buf buffers + unsigned JumpBufAlignment; + + /// The minimum alignment that any argument on the stack needs to have. + unsigned MinStackArgumentAlignment; + + /// The minimum function alignment (used when optimizing for size, and to + /// prevent explicitly provided alignment from leading to incorrect code). + unsigned MinFunctionAlignment; + + /// The preferred function alignment (used when alignment unspecified and + /// optimizing for speed). + unsigned PrefFunctionAlignment; + + /// The preferred loop alignment. + unsigned PrefLoopAlignment; + + /// Size in bits of the maximum atomics size the backend supports. + /// Accesses larger than this will be expanded by AtomicExpandPass. + unsigned MaxAtomicSizeInBitsSupported; + + /// Size in bits of the minimum cmpxchg or ll/sc operation the + /// backend supports. + unsigned MinCmpXchgSizeInBits; + + /// This indicates if the target supports unaligned atomic operations. + bool SupportsUnalignedAtomics; + + /// If set to a physical register, this specifies the register that + /// llvm.savestack/llvm.restorestack should save and restore. + unsigned StackPointerRegisterToSaveRestore; + + /// This indicates the default register class to use for each ValueType the + /// target supports natively. + const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; + unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; + MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; + + /// This indicates the "representative" register class to use for each + /// ValueType the target supports natively. This information is used by the + /// scheduler to track register pressure. By default, the representative + /// register class is the largest legal super-reg register class of the + /// register class of the specified type. e.g. On x86, i8, i16, and i32's + /// representative class would be GR32. + const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; + + /// This indicates the "cost" of the "representative" register class for each + /// ValueType. The cost is used by the scheduler to approximate register + /// pressure. + uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; + + /// For any value types we are promoting or expanding, this contains the value + /// type that we are changing to. For Expanded types, this contains one step + /// of the expand (e.g. i64 -> i32), even if there are multiple steps required + /// (e.g. i64 -> i16). For types natively supported by the system, this holds + /// the same type (e.g. i32 -> i32). + MVT TransformToType[MVT::LAST_VALUETYPE]; + + /// For each operation and each value type, keep a LegalizeAction that + /// indicates how instruction selection should deal with the operation. Most + /// operations are Legal (aka, supported natively by the target), but + /// operations that are not should be described. Note that operations on + /// non-legal value types are not described here. + LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; + + /// For each load extension type and each value type, keep a LegalizeAction + /// that indicates how instruction selection should deal with a load of a + /// specific value type and extension type. Uses 4-bits to store the action + /// for each of the 4 load ext types. + uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; + + /// For each value type pair keep a LegalizeAction that indicates whether a + /// truncating store of a specific value type and truncating type is legal. + LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; + + /// For each indexed mode and each value type, keep a pair of LegalizeAction + /// that indicates how instruction selection should deal with the load / + /// store. + /// + /// The first dimension is the value_type for the reference. The second + /// dimension represents the various modes for load store. + uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; + + /// For each condition code (ISD::CondCode) keep a LegalizeAction that + /// indicates how instruction selection should deal with the condition code. + /// + /// Because each CC action takes up 4 bits, we need to have the array size be + /// large enough to fit all of the value types. This can be done by rounding + /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. + uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; + +protected: + ValueTypeActionImpl ValueTypeActions; + +private: + LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; + + /// Targets can specify ISD nodes that they would like PerformDAGCombine + /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this + /// array. + unsigned char + TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; + + /// For operations that must be promoted to a specific type, this holds the + /// destination type. This map should be sparse, so don't hold it as an + /// array. + /// + /// Targets add entries to this map with AddPromotedToType(..), clients access + /// this with getTypeToPromoteTo(..). + std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> + PromoteToType; + + /// Stores the name each libcall. + const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL]; + + /// The ISD::CondCode that should be used to test the result of each of the + /// comparison libcall against zero. + ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; + + /// Stores the CallingConv that should be used for each libcall. + CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; + +protected: + /// Return true if the extension represented by \p I is free. + /// \pre \p I is a sign, zero, or fp extension and + /// is[Z|FP]ExtFree of the related types is not true. + virtual bool isExtFreeImpl(const Instruction *I) const { return false; } + + /// Depth that GatherAllAliases should should continue looking for chain + /// dependencies when trying to find a more preferable chain. As an + /// approximation, this should be more than the number of consecutive stores + /// expected to be merged. + unsigned GatherAllAliasesMaxDepth; + + /// \brief Specify maximum number of store instructions per memset call. + /// + /// When lowering \@llvm.memset this field specifies the maximum number of + /// store operations that may be substituted for the call to memset. Targets + /// must set this value based on the cost threshold for that target. Targets + /// should assume that the memset will be done using as many of the largest + /// store operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine + /// with 16-bit alignment would result in four 2-byte stores and one 1-byte + /// store. This only applies to setting a constant array of a constant size. + unsigned MaxStoresPerMemset; + + /// Maximum number of stores operations that may be substituted for the call + /// to memset, used for functions with OptSize attribute. + unsigned MaxStoresPerMemsetOptSize; + + /// \brief Specify maximum bytes of store instructions per memcpy call. + /// + /// When lowering \@llvm.memcpy this field specifies the maximum number of + /// store operations that may be substituted for a call to memcpy. Targets + /// must set this value based on the cost threshold for that target. Targets + /// should assume that the memcpy will be done using as many of the largest + /// store operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine + /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store + /// and one 1-byte store. This only applies to copying a constant array of + /// constant size. + unsigned MaxStoresPerMemcpy; + + /// Maximum number of store operations that may be substituted for a call to + /// memcpy, used for functions with OptSize attribute. + unsigned MaxStoresPerMemcpyOptSize; + unsigned MaxLoadsPerMemcmp; + unsigned MaxLoadsPerMemcmpOptSize; + + /// \brief Specify maximum bytes of store instructions per memmove call. + /// + /// When lowering \@llvm.memmove this field specifies the maximum number of + /// store instructions that may be substituted for a call to memmove. Targets + /// must set this value based on the cost threshold for that target. Targets + /// should assume that the memmove will be done using as many of the largest + /// store operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine + /// with 8-bit alignment would result in nine 1-byte stores. This only + /// applies to copying a constant array of constant size. + unsigned MaxStoresPerMemmove; + + /// Maximum number of store instructions that may be substituted for a call to + /// memmove, used for functions with OptSize attribute. + unsigned MaxStoresPerMemmoveOptSize; + + /// Tells the code generator that select is more expensive than a branch if + /// the branch is usually predicted right. + bool PredictableSelectIsExpensive; + + /// \see enableExtLdPromotion. + bool EnableExtLdPromotion; + + /// Return true if the value types that can be represented by the specified + /// register class are all legal. + bool isLegalRC(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) const; + + /// Replace/modify any TargetFrameIndex operands with a targte-dependent + /// sequence of memory operands that is recognized by PrologEpilogInserter. + MachineBasicBlock *emitPatchPoint(MachineInstr &MI, + MachineBasicBlock *MBB) const; +}; + +/// This class defines information used to lower LLVM code to legal SelectionDAG +/// operators that the target instruction selector can accept natively. +/// +/// This class also defines callbacks that targets must implement to lower +/// target-specific constructs to SelectionDAG operators. +class TargetLowering : public TargetLoweringBase { +public: + struct DAGCombinerInfo; + + TargetLowering(const TargetLowering &) = delete; + TargetLowering &operator=(const TargetLowering &) = delete; + + /// NOTE: The TargetMachine owns TLOF. + explicit TargetLowering(const TargetMachine &TM); + + bool isPositionIndependent() const; + + /// Returns true by value, base pointer and offset pointer and addressing mode + /// by reference if the node's address can be legally represented as + /// pre-indexed load / store address. + virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, + SDValue &/*Offset*/, + ISD::MemIndexedMode &/*AM*/, + SelectionDAG &/*DAG*/) const { + return false; + } + + /// Returns true by value, base pointer and offset pointer and addressing mode + /// by reference if this node can be combined with a load / store to form a + /// post-indexed load / store. + virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, + SDValue &/*Base*/, + SDValue &/*Offset*/, + ISD::MemIndexedMode &/*AM*/, + SelectionDAG &/*DAG*/) const { + return false; + } + + /// Return the entry encoding for a jump table in the current function. The + /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. + virtual unsigned getJumpTableEncoding() const; + + virtual const MCExpr * + LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, + const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, + MCContext &/*Ctx*/) const { + llvm_unreachable("Need to implement this hook if target has custom JTIs"); + } + + /// Returns relocation base for the given PIC jumptable. + virtual SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const; + + /// This returns the relocation base for the given PIC jumptable, the same as + /// getPICJumpTableRelocBase, but as an MCExpr. + virtual const MCExpr * + getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI, MCContext &Ctx) const; + + /// Return true if folding a constant offset with the given GlobalAddress is + /// legal. It is frequently not legal in PIC relocation models. + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const; + + void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, + SDValue &NewRHS, ISD::CondCode &CCCode, + const SDLoc &DL) const; + + /// Returns a pair of (return value, chain). + /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. + std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, + EVT RetVT, ArrayRef<SDValue> Ops, + bool isSigned, const SDLoc &dl, + bool doesNotReturn = false, + bool isReturnValueUsed = true) const; + + /// Check whether parameters to a call that are passed in callee saved + /// registers are the same as from the calling function. This needs to be + /// checked for tail call eligibility. + bool parametersInCSRMatch(const MachineRegisterInfo &MRI, + const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &ArgLocs, + const SmallVectorImpl<SDValue> &OutVals) const; + + //===--------------------------------------------------------------------===// + // TargetLowering Optimization Methods + // + + /// A convenience struct that encapsulates a DAG, and two SDValues for + /// returning information from TargetLowering to its clients that want to + /// combine. + struct TargetLoweringOpt { + SelectionDAG &DAG; + bool LegalTys; + bool LegalOps; + SDValue Old; + SDValue New; + + explicit TargetLoweringOpt(SelectionDAG &InDAG, + bool LT, bool LO) : + DAG(InDAG), LegalTys(LT), LegalOps(LO) {} + + bool LegalTypes() const { return LegalTys; } + bool LegalOperations() const { return LegalOps; } + + bool CombineTo(SDValue O, SDValue N) { + Old = O; + New = N; + return true; + } + }; + + /// Check to see if the specified operand of the specified instruction is a + /// constant integer. If so, check to see if there are any bits set in the + /// constant that are not demanded. If so, shrink the constant and return + /// true. + bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + // Target hook to do target-specific const optimization, which is called by + // ShrinkDemandedConstant. This function should return true if the target + // doesn't want ShrinkDemandedConstant to further optimize the constant. + virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + return false; + } + + /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This + /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be + /// generalized for targets with other types of implicit widening casts. + bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + /// Helper for SimplifyDemandedBits that can simplify an operation with + /// multiple uses. This function simplifies operand \p OpIdx of \p User and + /// then updates \p User with the simplified version. No other uses of + /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this + /// function behaves exactly like function SimplifyDemandedBits declared + /// below except that it also updates the DAG by calling + /// DCI.CommitTargetLoweringOpt. + bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, + DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const; + + /// Look at Op. At this point, we know that only the DemandedMask bits of the + /// result of Op are ever used downstream. If we can use this information to + /// simplify Op, create a new simplified DAG node and return true, returning + /// the original and new nodes in Old and New. Otherwise, analyze the + /// expression and return a mask of KnownOne and KnownZero bits for the + /// expression (used to simplify the caller). The KnownZero/One bits may only + /// be accurate for those bits in the DemandedMask. + /// \p AssumeSingleUse When this parameter is true, this function will + /// attempt to simplify \p Op even if there are multiple uses. + /// Callers are responsible for correctly updating the DAG based on the + /// results of this function, because simply replacing replacing TLO.Old + /// with TLO.New will be incorrect when this parameter is true and TLO.Old + /// has multiple uses. + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, + KnownBits &Known, + TargetLoweringOpt &TLO, + unsigned Depth = 0, + bool AssumeSingleUse = false) const; + + /// Helper wrapper around SimplifyDemandedBits + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, + DAGCombinerInfo &DCI) const; + + /// Determine which of the bits specified in Mask are known to be either zero + /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts + /// argument allows us to only collect the known bits that are shared by the + /// requested vector elements. + virtual void computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. + /// Default implementation computes low bits based on alignment + /// information. This should preserve known bits passed into it. + virtual void computeKnownBitsForFrameIndex(const SDValue FIOp, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + /// This method can be implemented by targets that want to expose additional + /// information about sign bits to the DAG Combiner. The DemandedElts + /// argument allows us to only collect the minimum sign bits that are shared + /// by the requested vector elements. + virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + struct DAGCombinerInfo { + void *DC; // The DAG Combiner object. + CombineLevel Level; + bool CalledByLegalizer; + + public: + SelectionDAG &DAG; + + DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) + : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} + + bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } + bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } + bool isAfterLegalizeVectorOps() const { + return Level == AfterLegalizeDAG; + } + CombineLevel getDAGCombineLevel() { return Level; } + bool isCalledByLegalizer() const { return CalledByLegalizer; } + + void AddToWorklist(SDNode *N); + SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); + + void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); + }; + + /// Return if the N is a constant or constant vector equal to the true value + /// from getBooleanContents(). + bool isConstTrueVal(const SDNode *N) const; + + /// Return if the N is a constant or constant vector equal to the false value + /// from getBooleanContents(). + bool isConstFalseVal(const SDNode *N) const; + + /// Return a constant of type VT that contains a true value that respects + /// getBooleanContents() + SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const; + + /// Return if \p N is a True value when extended to \p VT. + bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const; + + /// Try to simplify a setcc built with the specified operands and cc. If it is + /// unable to simplify it, return a null SDValue. + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + bool foldBooleans, DAGCombinerInfo &DCI, + const SDLoc &dl) const; + + // For targets which wrap address, unwrap for analysis. + virtual SDValue unwrapAddress(SDValue N) const { return N; } + + /// Returns true (and the GlobalValue and the offset) if the node is a + /// GlobalAddress + offset. + virtual bool + isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; + + /// This method will be invoked for all target nodes and for any + /// target-independent nodes that the target has registered with invoke it + /// for. + /// + /// The semantics are as follows: + /// Return Value: + /// SDValue.Val == 0 - No change was made + /// SDValue.Val == N - N was replaced, is dead, and is already handled. + /// otherwise - N should be replaced by the returned Operand. + /// + /// In addition, methods provided by DAGCombinerInfo may be used to perform + /// more complex transformations. + /// + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + /// Return true if it is profitable to move a following shift through this + // node, adjusting any immediate operands as necessary to preserve semantics. + // This transformation may not be desirable if it disrupts a particularly + // auspicious target-specific tree (e.g. bitfield extraction in AArch64). + // By default, it returns true. + virtual bool isDesirableToCommuteWithShift(const SDNode *N) const { + return true; + } + + // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern + // to a shuffle and a truncate. + // Example of such a combine: + // v4i32 build_vector((extract_elt V, 1), + // (extract_elt V, 3), + // (extract_elt V, 5), + // (extract_elt V, 7)) + // --> + // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64) + virtual bool isDesirableToCombineBuildVectorToShuffleTruncate( + ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const { + return false; + } + + /// Return true if the target has native support for the specified value type + /// and it is 'desirable' to use the type for the given node type. e.g. On x86 + /// i16 is legal, but undesirable since i16 instruction encodings are longer + /// and some i16 instructions are slow. + virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { + // By default, assume all legal types are desirable. + return isTypeLegal(VT); + } + + /// Return true if it is profitable for dag combiner to transform a floating + /// point op of specified opcode to a equivalent op of an integer + /// type. e.g. f32 load -> i32 load can be profitable on ARM. + virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, + EVT /*VT*/) const { + return false; + } + + /// This method query the target whether it is beneficial for dag combiner to + /// promote the specified node. If true, it should return the desired + /// promotion type by reference. + virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { + return false; + } + + /// Return true if the target supports swifterror attribute. It optimizes + /// loads and stores to reading and writing a specific register. + virtual bool supportSwiftError() const { + return false; + } + + /// Return true if the target supports that a subset of CSRs for the given + /// machine function is handled explicitly via copies. + virtual bool supportSplitCSR(MachineFunction *MF) const { + return false; + } + + /// Perform necessary initialization to handle a subset of CSRs explicitly + /// via copies. This function is called at the beginning of instruction + /// selection. + virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { + llvm_unreachable("Not Implemented"); + } + + /// Insert explicit copies in entry and exit blocks. We copy a subset of + /// CSRs to virtual registers in the entry block, and copy them back to + /// physical registers in the exit blocks. This function is called at the end + /// of instruction selection. + virtual void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl<MachineBasicBlock *> &Exits) const { + llvm_unreachable("Not Implemented"); + } + + //===--------------------------------------------------------------------===// + // Lowering methods - These methods must be implemented by targets so that + // the SelectionDAGBuilder code knows how to lower these. + // + + /// This hook must be implemented to lower the incoming (formal) arguments, + /// described by the Ins array, into the specified DAG. The implementation + /// should fill in the InVals array with legal-type argument values, and + /// return the resulting token chain value. + virtual SDValue LowerFormalArguments( + SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, + const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, + SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { + llvm_unreachable("Not Implemented"); + } + + /// This structure contains all information that is necessary for lowering + /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder + /// needs to lower a call, and targets will see this struct in their LowerCall + /// implementation. + struct CallLoweringInfo { + SDValue Chain; + Type *RetTy = nullptr; + bool RetSExt : 1; + bool RetZExt : 1; + bool IsVarArg : 1; + bool IsInReg : 1; + bool DoesNotReturn : 1; + bool IsReturnValueUsed : 1; + bool IsConvergent : 1; + bool IsPatchPoint : 1; + + // IsTailCall should be modified by implementations of + // TargetLowering::LowerCall that perform tail call conversions. + bool IsTailCall = false; + + // Is Call lowering done post SelectionDAG type legalization. + bool IsPostTypeLegalization = false; + + unsigned NumFixedArgs = -1; + CallingConv::ID CallConv = CallingConv::C; + SDValue Callee; + ArgListTy Args; + SelectionDAG &DAG; + SDLoc DL; + ImmutableCallSite CS; + SmallVector<ISD::OutputArg, 32> Outs; + SmallVector<SDValue, 32> OutVals; + SmallVector<ISD::InputArg, 32> Ins; + SmallVector<SDValue, 4> InVals; + + CallLoweringInfo(SelectionDAG &DAG) + : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), + DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), + IsPatchPoint(false), DAG(DAG) {} + + CallLoweringInfo &setDebugLoc(const SDLoc &dl) { + DL = dl; + return *this; + } + + CallLoweringInfo &setChain(SDValue InChain) { + Chain = InChain; + return *this; + } + + // setCallee with target/module-specific attributes + CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, + SDValue Target, ArgListTy &&ArgsList) { + RetTy = ResultType; + Callee = Target; + CallConv = CC; + NumFixedArgs = ArgsList.size(); + Args = std::move(ArgsList); + + DAG.getTargetLoweringInfo().markLibCallAttributes( + &(DAG.getMachineFunction()), CC, Args); + return *this; + } + + CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, + SDValue Target, ArgListTy &&ArgsList) { + RetTy = ResultType; + Callee = Target; + CallConv = CC; + NumFixedArgs = ArgsList.size(); + Args = std::move(ArgsList); + return *this; + } + + CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, + SDValue Target, ArgListTy &&ArgsList, + ImmutableCallSite Call) { + RetTy = ResultType; + + IsInReg = Call.hasRetAttr(Attribute::InReg); + DoesNotReturn = + Call.doesNotReturn() || + (!Call.isInvoke() && + isa<UnreachableInst>(Call.getInstruction()->getNextNode())); + IsVarArg = FTy->isVarArg(); + IsReturnValueUsed = !Call.getInstruction()->use_empty(); + RetSExt = Call.hasRetAttr(Attribute::SExt); + RetZExt = Call.hasRetAttr(Attribute::ZExt); + + Callee = Target; + + CallConv = Call.getCallingConv(); + NumFixedArgs = FTy->getNumParams(); + Args = std::move(ArgsList); + + CS = Call; + + return *this; + } + + CallLoweringInfo &setInRegister(bool Value = true) { + IsInReg = Value; + return *this; + } + + CallLoweringInfo &setNoReturn(bool Value = true) { + DoesNotReturn = Value; + return *this; + } + + CallLoweringInfo &setVarArg(bool Value = true) { + IsVarArg = Value; + return *this; + } + + CallLoweringInfo &setTailCall(bool Value = true) { + IsTailCall = Value; + return *this; + } + + CallLoweringInfo &setDiscardResult(bool Value = true) { + IsReturnValueUsed = !Value; + return *this; + } + + CallLoweringInfo &setConvergent(bool Value = true) { + IsConvergent = Value; + return *this; + } + + CallLoweringInfo &setSExtResult(bool Value = true) { + RetSExt = Value; + return *this; + } + + CallLoweringInfo &setZExtResult(bool Value = true) { + RetZExt = Value; + return *this; + } + + CallLoweringInfo &setIsPatchPoint(bool Value = true) { + IsPatchPoint = Value; + return *this; + } + + CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { + IsPostTypeLegalization = Value; + return *this; + } + + ArgListTy &getArgs() { + return Args; + } + }; + + /// This function lowers an abstract call to a function into an actual call. + /// This returns a pair of operands. The first element is the return value + /// for the function (if RetTy is not VoidTy). The second element is the + /// outgoing token chain. It calls LowerCall to do the actual lowering. + std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; + + /// This hook must be implemented to lower calls into the specified + /// DAG. The outgoing arguments to the call are described by the Outs array, + /// and the values to be returned by the call are described by the Ins + /// array. The implementation should fill in the InVals array with legal-type + /// return values from the call, and return the resulting token chain value. + virtual SDValue + LowerCall(CallLoweringInfo &/*CLI*/, + SmallVectorImpl<SDValue> &/*InVals*/) const { + llvm_unreachable("Not Implemented"); + } + + /// Target-specific cleanup for formal ByVal parameters. + virtual void HandleByVal(CCState *, unsigned &, unsigned) const {} + + /// This hook should be implemented to check whether the return values + /// described by the Outs array can fit into the return registers. If false + /// is returned, an sret-demotion is performed. + virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, + MachineFunction &/*MF*/, bool /*isVarArg*/, + const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, + LLVMContext &/*Context*/) const + { + // Return true by default to get preexisting behavior. + return true; + } + + /// This hook must be implemented to lower outgoing return values, described + /// by the Outs array, into the specified DAG. The implementation should + /// return the resulting token chain value. + virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, + bool /*isVarArg*/, + const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, + const SmallVectorImpl<SDValue> & /*OutVals*/, + const SDLoc & /*dl*/, + SelectionDAG & /*DAG*/) const { + llvm_unreachable("Not Implemented"); + } + + /// Return true if result of the specified node is used by a return node + /// only. It also compute and return the input chain for the tail call. + /// + /// This is used to determine whether it is possible to codegen a libcall as + /// tail call at legalization time. + virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { + return false; + } + + /// Return true if the target may be able emit the call instruction as a tail + /// call. This is used by optimization passes to determine if it's profitable + /// to duplicate return instructions to enable tailcall optimization. + virtual bool mayBeEmittedAsTailCall(const CallInst *) const { + return false; + } + + /// Return the builtin name for the __builtin___clear_cache intrinsic + /// Default is to invoke the clear cache library call + virtual const char * getClearCacheBuiltinName() const { + return "__clear_cache"; + } + + /// Return the register ID of the name passed in. Used by named register + /// global variables extension. There is no target-independent behaviour + /// so the default action is to bail. + virtual unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { + report_fatal_error("Named registers not implemented for this target"); + } + + /// Return the type that should be used to zero or sign extend a + /// zeroext/signext integer return value. FIXME: Some C calling conventions + /// require the return type to be promoted, but this is not true all the time, + /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling + /// conventions. The frontend should handle this and include all of the + /// necessary information. + virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType /*ExtendKind*/) const { + EVT MinVT = getRegisterType(Context, MVT::i32); + return VT.bitsLT(MinVT) ? MinVT : VT; + } + + /// For some targets, an LLVM struct type must be broken down into multiple + /// simple types, but the calling convention specifies that the entire struct + /// must be passed in a block of consecutive registers. + virtual bool + functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, + bool isVarArg) const { + return false; + } + + /// Returns a 0 terminated array of registers that can be safely used as + /// scratch registers. + virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { + return nullptr; + } + + /// This callback is used to prepare for a volatile or atomic load. + /// It takes a chain node as input and returns the chain for the load itself. + /// + /// Having a callback like this is necessary for targets like SystemZ, + /// which allows a CPU to reuse the result of a previous load indefinitely, + /// even if a cache-coherent store is performed by another CPU. The default + /// implementation does nothing. + virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, + SelectionDAG &DAG) const { + return Chain; + } + + /// This callback is used to inspect load/store instructions and add + /// target-specific MachineMemOperand flags to them. The default + /// implementation does nothing. + virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { + return MachineMemOperand::MONone; + } + + /// This callback is invoked by the type legalizer to legalize nodes with an + /// illegal operand type but legal result types. It replaces the + /// LowerOperation callback in the type Legalizer. The reason we can not do + /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to + /// use this callback. + /// + /// TODO: Consider merging with ReplaceNodeResults. + /// + /// The target places new result values for the node in Results (their number + /// and types must exactly match those of the original return values of + /// the node), or leaves Results empty, which indicates that the node is not + /// to be custom lowered after all. + /// The default implementation calls LowerOperation. + virtual void LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const; + + /// This callback is invoked for operations that are unsupported by the + /// target, which are registered to use 'custom' lowering, and whose defined + /// values are all legal. If the target has no operations that require custom + /// lowering, it need not implement this. The default implementation of this + /// aborts. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + /// This callback is invoked when a node result type is illegal for the + /// target, and the operation was registered to use 'custom' lowering for that + /// result type. The target places new result values for the node in Results + /// (their number and types must exactly match those of the original return + /// values of the node), or leaves Results empty, which indicates that the + /// node is not to be custom lowered after all. + /// + /// If the target has no operations that require custom lowering, it need not + /// implement this. The default implementation aborts. + virtual void ReplaceNodeResults(SDNode * /*N*/, + SmallVectorImpl<SDValue> &/*Results*/, + SelectionDAG &/*DAG*/) const { + llvm_unreachable("ReplaceNodeResults not implemented for this target!"); + } + + /// This method returns the name of a target specific DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// This method returns a target specific FastISel object, or null if the + /// target does not support "fast" ISel. + virtual FastISel *createFastISel(FunctionLoweringInfo &, + const TargetLibraryInfo *) const { + return nullptr; + } + + bool verifyReturnAddressArgumentIsConstant(SDValue Op, + SelectionDAG &DAG) const; + + //===--------------------------------------------------------------------===// + // Inline Asm Support hooks + // + + /// This hook allows the target to expand an inline asm call to be explicit + /// llvm code if it wants to. This is useful for turning simple inline asms + /// into LLVM intrinsics, which gives the compiler more information about the + /// behavior of the code. + virtual bool ExpandInlineAsm(CallInst *) const { + return false; + } + + enum ConstraintType { + C_Register, // Constraint represents specific register(s). + C_RegisterClass, // Constraint represents any of register(s) in class. + C_Memory, // Memory constraint. + C_Other, // Something else. + C_Unknown // Unsupported constraint. + }; + + enum ConstraintWeight { + // Generic weights. + CW_Invalid = -1, // No match. + CW_Okay = 0, // Acceptable. + CW_Good = 1, // Good weight. + CW_Better = 2, // Better weight. + CW_Best = 3, // Best weight. + + // Well-known weights. + CW_SpecificReg = CW_Okay, // Specific register operands. + CW_Register = CW_Good, // Register operands. + CW_Memory = CW_Better, // Memory operands. + CW_Constant = CW_Best, // Constant operand. + CW_Default = CW_Okay // Default or don't know type. + }; + + /// This contains information for each constraint that we are lowering. + struct AsmOperandInfo : public InlineAsm::ConstraintInfo { + /// This contains the actual string for the code, like "m". TargetLowering + /// picks the 'best' code from ConstraintInfo::Codes that most closely + /// matches the operand. + std::string ConstraintCode; + + /// Information about the constraint code, e.g. Register, RegisterClass, + /// Memory, Other, Unknown. + TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; + + /// If this is the result output operand or a clobber, this is null, + /// otherwise it is the incoming operand to the CallInst. This gets + /// modified as the asm is processed. + Value *CallOperandVal = nullptr; + + /// The ValueType for the operand value. + MVT ConstraintVT = MVT::Other; + + /// Copy constructor for copying from a ConstraintInfo. + AsmOperandInfo(InlineAsm::ConstraintInfo Info) + : InlineAsm::ConstraintInfo(std::move(Info)) {} + + /// Return true of this is an input operand that is a matching constraint + /// like "4". + bool isMatchingInputConstraint() const; + + /// If this is an input matching constraint, this method returns the output + /// operand it matches. + unsigned getMatchedOperand() const; + }; + + using AsmOperandInfoVector = std::vector<AsmOperandInfo>; + + /// Split up the constraint string from the inline assembly value into the + /// specific constraints and their prefixes, and also tie in the associated + /// operand values. If this returns an empty vector, and if the constraint + /// string itself isn't empty, there was an error parsing. + virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, + const TargetRegisterInfo *TRI, + ImmutableCallSite CS) const; + + /// Examine constraint type and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + virtual ConstraintWeight getMultipleConstraintMatchWeight( + AsmOperandInfo &info, int maIndex) const; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + virtual ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + + /// Determines the constraint code and constraint type to use for the specific + /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. + /// If the actual operand being passed in is available, it can be passed in as + /// Op, otherwise an empty SDValue can be passed. + virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + SelectionDAG *DAG = nullptr) const; + + /// Given a constraint, return the type of constraint it is for this target. + virtual ConstraintType getConstraintType(StringRef Constraint) const; + + /// Given a physical register constraint (e.g. {edx}), return the register + /// number and the register class for the register. + /// + /// Given a register class constraint, like 'r', if this corresponds directly + /// to an LLVM register class, return a register of 0 and the register class + /// pointer. + /// + /// This should only be used for C_Register constraints. On error, this + /// returns a register number of 0 and a null register class pointer. + virtual std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const; + + virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { + if (ConstraintCode == "i") + return InlineAsm::Constraint_i; + else if (ConstraintCode == "m") + return InlineAsm::Constraint_m; + return InlineAsm::Constraint_Unknown; + } + + /// Try to replace an X constraint, which matches anything, with another that + /// has more specific requirements based on the type of the corresponding + /// operand. This returns null if there is no replacement to make. + virtual const char *LowerXConstraint(EVT ConstraintVT) const; + + /// Lower the specified operand into the Ops vector. If it is invalid, don't + /// add anything to Ops. + virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + //===--------------------------------------------------------------------===// + // Div utility functions + // + SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + bool IsAfterLegalization, + std::vector<SDNode *> *Created) const; + SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + bool IsAfterLegalization, + std::vector<SDNode *> *Created) const; + + /// Targets may override this function to provide custom SDIV lowering for + /// power-of-2 denominators. If the target returns an empty SDValue, LLVM + /// assumes SDIV is expensive and replaces it with a series of other integer + /// operations. + virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector<SDNode *> *Created) const; + + /// Indicate whether this target prefers to combine FDIVs with the same + /// divisor. If the transform should never be done, return zero. If the + /// transform should be done, return the minimum number of divisor uses + /// that must exist. + virtual unsigned combineRepeatedFPDivisors() const { + return 0; + } + + /// Hooks for building estimates in place of slower divisions and square + /// roots. + + /// Return either a square root or its reciprocal estimate value for the input + /// operand. + /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or + /// 'Enabled' as set by a potential default override attribute. + /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson + /// refinement iterations required to generate a sufficient (though not + /// necessarily IEEE-754 compliant) estimate is returned in that parameter. + /// The boolean UseOneConstNR output is used to select a Newton-Raphson + /// algorithm implementation that uses either one or two constants. + /// The boolean Reciprocal is used to select whether the estimate is for the + /// square root of the input operand or the reciprocal of its square root. + /// A target may choose to implement its own refinement within this function. + /// If that's true, then return '0' as the number of RefinementSteps to avoid + /// any further refinement of the estimate. + /// An empty SDValue return means no estimate sequence can be created. + virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &RefinementSteps, + bool &UseOneConstNR, bool Reciprocal) const { + return SDValue(); + } + + /// Return a reciprocal estimate value for the input operand. + /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or + /// 'Enabled' as set by a potential default override attribute. + /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson + /// refinement iterations required to generate a sufficient (though not + /// necessarily IEEE-754 compliant) estimate is returned in that parameter. + /// A target may choose to implement its own refinement within this function. + /// If that's true, then return '0' as the number of RefinementSteps to avoid + /// any further refinement of the estimate. + /// An empty SDValue return means no estimate sequence can be created. + virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &RefinementSteps) const { + return SDValue(); + } + + //===--------------------------------------------------------------------===// + // Legalization utility functions + // + + /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, + /// respectively, each computing an n/2-bit part of the result. + /// \param Result A vector that will be filled with the parts of the result + /// in little-endian order. + /// \param LL Low bits of the LHS of the MUL. You can use this parameter + /// if you want to control how low bits are extracted from the LHS. + /// \param LH High bits of the LHS of the MUL. See LL for meaning. + /// \param RL Low bits of the RHS of the MUL. See LL for meaning + /// \param RH High bits of the RHS of the MUL. See LL for meaning. + /// \returns true if the node has been expanded, false if it has not + bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, + SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, + SelectionDAG &DAG, MulExpansionKind Kind, + SDValue LL = SDValue(), SDValue LH = SDValue(), + SDValue RL = SDValue(), SDValue RH = SDValue()) const; + + /// Expand a MUL into two nodes. One that computes the high bits of + /// the result and one that computes the low bits. + /// \param HiLoVT The value type to use for the Lo and Hi nodes. + /// \param LL Low bits of the LHS of the MUL. You can use this parameter + /// if you want to control how low bits are extracted from the LHS. + /// \param LH High bits of the LHS of the MUL. See LL for meaning. + /// \param RL Low bits of the RHS of the MUL. See LL for meaning + /// \param RH High bits of the RHS of the MUL. See LL for meaning. + /// \returns true if the node has been expanded. false if it has not + bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, MulExpansionKind Kind, + SDValue LL = SDValue(), SDValue LH = SDValue(), + SDValue RL = SDValue(), SDValue RH = SDValue()) const; + + /// Expand float(f32) to SINT(i64) conversion + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + + /// Turn load of vector type into a load of the individual elements. + /// \param LD load to expand + /// \returns MERGE_VALUEs of the scalar loads with their chains. + SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const; + + // Turn a store of a vector type into stores of the individual elements. + /// \param ST Store with a vector value type + /// \returns MERGE_VALUs of the individual store chains. + SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; + + /// Expands an unaligned load to 2 half-size loads for an integer, and + /// possibly more for vectors. + std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, + SelectionDAG &DAG) const; + + /// Expands an unaligned store to 2 half-size stores for integer values, and + /// possibly more for vectors. + SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; + + /// Increments memory address \p Addr according to the type of the value + /// \p DataVT that should be stored. If the data is stored in compressed + /// form, the memory address should be incremented according to the number of + /// the stored elements. This number is equal to the number of '1's bits + /// in the \p Mask. + /// \p DataVT is a vector type. \p Mask is a vector value. + /// \p DataVT and \p Mask have the same number of vector elements. + SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, + EVT DataVT, SelectionDAG &DAG, + bool IsCompressedMemory) const; + + /// Get a pointer to vector element \p Idx located in memory for a vector of + /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of + /// bounds the returned pointer is unspecified, but will be within the vector + /// bounds. + SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, + SDValue Idx) const; + + //===--------------------------------------------------------------------===// + // Instruction Emitting Hooks + // + + /// This method should be implemented by targets that mark instructions with + /// the 'usesCustomInserter' flag. These instructions are special in various + /// ways, which require special support to insert. The specified MachineInstr + /// is created but not inserted into any basic blocks, and this method is + /// called to expand it into a sequence of instructions, potentially also + /// creating new basic blocks and control flow. + /// As long as the returned basic block is different (i.e., we created a new + /// one), the custom inserter is free to modify the rest of \p MBB. + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; + + /// This method should be implemented by targets that mark instructions with + /// the 'hasPostISelHook' flag. These instructions must be adjusted after + /// instruction selection by target hooks. e.g. To fill in optional defs for + /// ARM 's' setting instructions. + virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const; + + /// If this function returns true, SelectionDAGBuilder emits a + /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. + virtual bool useLoadStackGuardNode() const { + return false; + } + + virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + llvm_unreachable("not implemented for this target"); + } + + /// Lower TLS global address SDNode for target independent emulated TLS model. + virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; + + // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) + // If we're comparing for equality to zero and isCtlzFast is true, expose the + // fact that this can be implemented as a ctlz/srl pair, so that the dag + // combiner can fold the new nodes. + SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; + +private: + SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const; +}; + +/// Given an LLVM IR type and return type attributes, compute the return value +/// EVTs and flags, and optionally also the offsets, if the return value is +/// being lowered to memory. +void GetReturnInfo(Type *ReturnType, AttributeList attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI, const DataLayout &DL); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_TARGETLOWERING_H diff --git a/include/llvm/CodeGen/TargetLoweringObjectFile.h b/include/llvm/CodeGen/TargetLoweringObjectFile.h new file mode 100644 index 0000000000000..fe77c2954129f --- /dev/null +++ b/include/llvm/CodeGen/TargetLoweringObjectFile.h @@ -0,0 +1,194 @@ +//===-- llvm/CodeGen/TargetLoweringObjectFile.h - Object Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H +#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/SectionKind.h" +#include <cstdint> + +namespace llvm { + +class GlobalValue; +class MachineModuleInfo; +class Mangler; +class MCContext; +class MCExpr; +class MCSection; +class MCSymbol; +class MCSymbolRefExpr; +class MCStreamer; +class MCValue; +class TargetMachine; + +class TargetLoweringObjectFile : public MCObjectFileInfo { + MCContext *Ctx = nullptr; + + /// Name-mangler for global names. + Mangler *Mang = nullptr; + +protected: + bool SupportIndirectSymViaGOTPCRel = false; + bool SupportGOTPCRelWithOffset = true; + + /// This section contains the static constructor pointer list. + MCSection *StaticCtorSection = nullptr; + + /// This section contains the static destructor pointer list. + MCSection *StaticDtorSection = nullptr; + +public: + TargetLoweringObjectFile() = default; + TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete; + TargetLoweringObjectFile & + operator=(const TargetLoweringObjectFile &) = delete; + virtual ~TargetLoweringObjectFile(); + + MCContext &getContext() const { return *Ctx; } + Mangler &getMangler() const { return *Mang; } + + /// This method must be called before any actual lowering is done. This + /// specifies the current context for codegen, and gives the lowering + /// implementations a chance to set up their default sections. + virtual void Initialize(MCContext &ctx, const TargetMachine &TM); + + virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, + const MCSymbol *Sym) const; + + /// Emit the module-level metadata that the platform cares about. + virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const {} + + /// Given a constant with the SectionKind, return a section that it should be + /// placed in. + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, + const Constant *C, + unsigned &Align) const; + + /// Classify the specified global variable into a set of target independent + /// categories embodied in SectionKind. + static SectionKind getKindForGlobal(const GlobalObject *GO, + const TargetMachine &TM); + + /// This method computes the appropriate section to emit the specified global + /// variable or function definition. This should not be passed external (or + /// available externally) globals. + MCSection *SectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const; + + /// This method computes the appropriate section to emit the specified global + /// variable or function definition. This should not be passed external (or + /// available externally) globals. + MCSection *SectionForGlobal(const GlobalObject *GO, + const TargetMachine &TM) const { + return SectionForGlobal(GO, getKindForGlobal(GO, TM), TM); + } + + virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName, + const GlobalValue *GV, + const TargetMachine &TM) const; + + virtual MCSection *getSectionForJumpTable(const Function &F, + const TargetMachine &TM) const; + + virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, + const Function &F) const; + + /// Targets should implement this method to assign a section to globals with + /// an explicit section specfied. The implementation of this method can + /// assume that GO->hasSection() is true. + virtual MCSection * + getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const = 0; + + /// Return an MCExpr to use for a reference to the specified global variable + /// from exception handling information. + virtual const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, + unsigned Encoding, + const TargetMachine &TM, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const; + + /// Return the MCSymbol for a private symbol with global value name as its + /// base, with the specified suffix. + MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix, + const TargetMachine &TM) const; + + // The symbol that gets passed to .cfi_personality. + virtual MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, + const TargetMachine &TM, + MachineModuleInfo *MMI) const; + + const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, + MCStreamer &Streamer) const; + + virtual MCSection *getStaticCtorSection(unsigned Priority, + const MCSymbol *KeySym) const { + return StaticCtorSection; + } + + virtual MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const { + return StaticDtorSection; + } + + /// \brief Create a symbol reference to describe the given TLS variable when + /// emitting the address in debug info. + virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const; + + virtual const MCExpr *lowerRelativeReference(const GlobalValue *LHS, + const GlobalValue *RHS, + const TargetMachine &TM) const { + return nullptr; + } + + /// \brief Target supports replacing a data "PC"-relative access to a symbol + /// through another symbol, by accessing the later via a GOT entry instead? + bool supportIndirectSymViaGOTPCRel() const { + return SupportIndirectSymViaGOTPCRel; + } + + /// \brief Target GOT "PC"-relative relocation supports encoding an additional + /// binary expression with an offset? + bool supportGOTPCRelWithOffset() const { + return SupportGOTPCRelWithOffset; + } + + /// \brief Get the target specific PC relative GOT entry relocation + virtual const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym, + const MCValue &MV, + int64_t Offset, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const { + return nullptr; + } + + virtual void emitLinkerFlagsForGlobal(raw_ostream &OS, + const GlobalValue *GV) const {} + +protected: + virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO, + SectionKind Kind, + const TargetMachine &TM) const = 0; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_TARGETLOWERINGOBJECTFILE_H diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index e4d3cc9cecfcc..69de9f8cb35da 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { @@ -182,6 +182,10 @@ public: const Function &F) const override; void InitializeWasm(); + MCSection *getStaticCtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; const MCExpr *lowerRelativeReference(const GlobalValue *LHS, const GlobalValue *RHS, diff --git a/include/llvm/CodeGen/TargetOpcodes.def b/include/llvm/CodeGen/TargetOpcodes.def new file mode 100644 index 0000000000000..d3e8483798a7f --- /dev/null +++ b/include/llvm/CodeGen/TargetOpcodes.def @@ -0,0 +1,461 @@ +//===-- llvm/CodeGen/TargetOpcodes.def - Target Indep Opcodes ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the target independent instruction opcodes. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +/// HANDLE_TARGET_OPCODE defines an opcode and its associated enum value. +/// +#ifndef HANDLE_TARGET_OPCODE +#define HANDLE_TARGET_OPCODE(OPC, NUM) +#endif + +/// HANDLE_TARGET_OPCODE_MARKER defines an alternative identifier for an opcode. +/// +#ifndef HANDLE_TARGET_OPCODE_MARKER +#define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) +#endif + +/// Every instruction defined here must also appear in Target.td. +/// +HANDLE_TARGET_OPCODE(PHI) +HANDLE_TARGET_OPCODE(INLINEASM) +HANDLE_TARGET_OPCODE(CFI_INSTRUCTION) +HANDLE_TARGET_OPCODE(EH_LABEL) +HANDLE_TARGET_OPCODE(GC_LABEL) +HANDLE_TARGET_OPCODE(ANNOTATION_LABEL) + +/// KILL - This instruction is a noop that is used only to adjust the +/// liveness of registers. This can be useful when dealing with +/// sub-registers. +HANDLE_TARGET_OPCODE(KILL) + +/// EXTRACT_SUBREG - This instruction takes two operands: a register +/// that has subregisters, and a subregister index. It returns the +/// extracted subregister value. This is commonly used to implement +/// truncation operations on target architectures which support it. +HANDLE_TARGET_OPCODE(EXTRACT_SUBREG) + +/// INSERT_SUBREG - This instruction takes three operands: a register that +/// has subregisters, a register providing an insert value, and a +/// subregister index. It returns the value of the first register with the +/// value of the second register inserted. The first register is often +/// defined by an IMPLICIT_DEF, because it is commonly used to implement +/// anyext operations on target architectures which support it. +HANDLE_TARGET_OPCODE(INSERT_SUBREG) + +/// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef. +HANDLE_TARGET_OPCODE(IMPLICIT_DEF) + +/// SUBREG_TO_REG - Assert the value of bits in a super register. +/// The result of this instruction is the value of the second operand inserted +/// into the subregister specified by the third operand. All other bits are +/// assumed to be equal to the bits in the immediate integer constant in the +/// first operand. This instruction just communicates information; No code +/// should be generated. +/// This is typically used after an instruction where the write to a subregister +/// implicitly cleared the bits in the super registers. +HANDLE_TARGET_OPCODE(SUBREG_TO_REG) + +/// COPY_TO_REGCLASS - This instruction is a placeholder for a plain +/// register-to-register copy into a specific register class. This is only +/// used between instruction selection and MachineInstr creation, before +/// virtual registers have been created for all the instructions, and it's +/// only needed in cases where the register classes implied by the +/// instructions are insufficient. It is emitted as a COPY MachineInstr. + HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS) + +/// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic +HANDLE_TARGET_OPCODE(DBG_VALUE) + +/// REG_SEQUENCE - This variadic instruction is used to form a register that +/// represents a consecutive sequence of sub-registers. It's used as a +/// register coalescing / allocation aid and must be eliminated before code +/// emission. +// In SDNode form, the first operand encodes the register class created by +// the REG_SEQUENCE, while each subsequent pair names a vreg + subreg index +// pair. Once it has been lowered to a MachineInstr, the regclass operand +// is no longer present. +/// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 +/// After register coalescing references of v1024 should be replace with +/// v1027:3, v1025 with v1027:4, etc. + HANDLE_TARGET_OPCODE(REG_SEQUENCE) + +/// COPY - Target-independent register copy. This instruction can also be +/// used to copy between subregisters of virtual registers. + HANDLE_TARGET_OPCODE(COPY) + +/// BUNDLE - This instruction represents an instruction bundle. Instructions +/// which immediately follow a BUNDLE instruction which are marked with +/// 'InsideBundle' flag are inside the bundle. +HANDLE_TARGET_OPCODE(BUNDLE) + +/// Lifetime markers. +HANDLE_TARGET_OPCODE(LIFETIME_START) +HANDLE_TARGET_OPCODE(LIFETIME_END) + +/// A Stackmap instruction captures the location of live variables at its +/// position in the instruction stream. It is followed by a shadow of bytes +/// that must lie within the function and not contain another stackmap. +HANDLE_TARGET_OPCODE(STACKMAP) + +/// FEntry all - This is a marker instruction which gets translated into a raw fentry call. +HANDLE_TARGET_OPCODE(FENTRY_CALL) + +/// Patchable call instruction - this instruction represents a call to a +/// constant address, followed by a series of NOPs. It is intended to +/// support optimizations for dynamic languages (such as javascript) that +/// rewrite calls to runtimes with more efficient code sequences. +/// This also implies a stack map. +HANDLE_TARGET_OPCODE(PATCHPOINT) + +/// This pseudo-instruction loads the stack guard value. Targets which need +/// to prevent the stack guard value or address from being spilled to the +/// stack should override TargetLowering::emitLoadStackGuardNode and +/// additionally expand this pseudo after register allocation. +HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD) + +/// Call instruction with associated vm state for deoptimization and list +/// of live pointers for relocation by the garbage collector. It is +/// intended to support garbage collection with fully precise relocating +/// collectors and deoptimizations in either the callee or caller. +HANDLE_TARGET_OPCODE(STATEPOINT) + +/// Instruction that records the offset of a local stack allocation passed to +/// llvm.localescape. It has two arguments: the symbol for the label and the +/// frame index of the local stack allocation. +HANDLE_TARGET_OPCODE(LOCAL_ESCAPE) + +/// Wraps a machine instruction which can fault, bundled with associated +/// information on how to handle such a fault. +/// For example loading instruction that may page fault, bundled with associated +/// information on how to handle such a page fault. It is intended to support +/// "zero cost" null checks in managed languages by allowing LLVM to fold +/// comparisons into existing memory operations. +HANDLE_TARGET_OPCODE(FAULTING_OP) + +/// Wraps a machine instruction to add patchability constraints. An +/// instruction wrapped in PATCHABLE_OP has to either have a minimum +/// size or be preceded with a nop of that size. The first operand is +/// an immediate denoting the minimum size of the instruction, the +/// second operand is an immediate denoting the opcode of the original +/// instruction. The rest of the operands are the operands of the +/// original instruction. +HANDLE_TARGET_OPCODE(PATCHABLE_OP) + +/// This is a marker instruction which gets translated into a nop sled, useful +/// for inserting instrumentation instructions at runtime. +HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER) + +/// Wraps a return instruction and its operands to enable adding nop sleds +/// either before or after the return. The nop sleds are useful for inserting +/// instrumentation instructions at runtime. +/// The patch here replaces the return instruction. +HANDLE_TARGET_OPCODE(PATCHABLE_RET) + +/// This is a marker instruction which gets translated into a nop sled, useful +/// for inserting instrumentation instructions at runtime. +/// The patch here prepends the return instruction. +/// The same thing as in x86_64 is not possible for ARM because it has multiple +/// return instructions. Furthermore, CPU allows parametrized and even +/// conditional return instructions. In the current ARM implementation we are +/// making use of the fact that currently LLVM doesn't seem to generate +/// conditional return instructions. +/// On ARM, the same instruction can be used for popping multiple registers +/// from the stack and returning (it just pops pc register too), and LLVM +/// generates it sometimes. So we can't insert the sled between this stack +/// adjustment and the return without splitting the original instruction into 2 +/// instructions. So on ARM, rather than jumping into the exit trampoline, we +/// call it, it does the tracing, preserves the stack and returns. +HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT) + +/// Wraps a tail call instruction and its operands to enable adding nop sleds +/// either before or after the tail exit. We use this as a disambiguation from +/// PATCHABLE_RET which specifically only works for return instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) + +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be +/// patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) + +/// The following generic opcodes are not supposed to appear after ISel. +/// This is something we might want to relax, but for now, this is convenient +/// to produce diagnostics. + +/// Generic ADD instruction. This is an integer add. +HANDLE_TARGET_OPCODE(G_ADD) +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD) + +/// Generic SUB instruction. This is an integer sub. +HANDLE_TARGET_OPCODE(G_SUB) + +// Generic multiply instruction. +HANDLE_TARGET_OPCODE(G_MUL) + +// Generic signed division instruction. +HANDLE_TARGET_OPCODE(G_SDIV) + +// Generic unsigned division instruction. +HANDLE_TARGET_OPCODE(G_UDIV) + +// Generic signed remainder instruction. +HANDLE_TARGET_OPCODE(G_SREM) + +// Generic unsigned remainder instruction. +HANDLE_TARGET_OPCODE(G_UREM) + +/// Generic bitwise and instruction. +HANDLE_TARGET_OPCODE(G_AND) + +/// Generic bitwise or instruction. +HANDLE_TARGET_OPCODE(G_OR) + +/// Generic bitwise exclusive-or instruction. +HANDLE_TARGET_OPCODE(G_XOR) + + +HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF) + +/// Generic PHI instruction with types. +HANDLE_TARGET_OPCODE(G_PHI) + +/// Generic instruction to materialize the address of an alloca or other +/// stack-based object. +HANDLE_TARGET_OPCODE(G_FRAME_INDEX) + +/// Generic reference to global value. +HANDLE_TARGET_OPCODE(G_GLOBAL_VALUE) + +/// Generic instruction to extract blocks of bits from the register given +/// (typically a sub-register COPY after instruction selection). +HANDLE_TARGET_OPCODE(G_EXTRACT) + +HANDLE_TARGET_OPCODE(G_UNMERGE_VALUES) + +/// Generic instruction to insert blocks of bits from the registers given into +/// the source. +HANDLE_TARGET_OPCODE(G_INSERT) + +/// Generic instruction to paste a variable number of components together into a +/// larger register. +HANDLE_TARGET_OPCODE(G_MERGE_VALUES) + +/// Generic pointer to int conversion. +HANDLE_TARGET_OPCODE(G_PTRTOINT) + +/// Generic int to pointer conversion. +HANDLE_TARGET_OPCODE(G_INTTOPTR) + +/// Generic bitcast. The source and destination types must be different, or a +/// COPY is the relevant instruction. +HANDLE_TARGET_OPCODE(G_BITCAST) + +/// Generic load. +HANDLE_TARGET_OPCODE(G_LOAD) + +/// Generic store. +HANDLE_TARGET_OPCODE(G_STORE) + +/// Generic atomic cmpxchg with internal success check. +HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) + +/// Generic atomic cmpxchg. +HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG) + +/// Generic atomicrmw. +HANDLE_TARGET_OPCODE(G_ATOMICRMW_XCHG) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_ADD) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_SUB) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_AND) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_NAND) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_OR) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_XOR) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) + +/// Generic conditional branch instruction. +HANDLE_TARGET_OPCODE(G_BRCOND) + +/// Generic indirect branch instruction. +HANDLE_TARGET_OPCODE(G_BRINDIRECT) + +/// Generic intrinsic use (without side effects). +HANDLE_TARGET_OPCODE(G_INTRINSIC) + +/// Generic intrinsic use (with side effects). +HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS) + +/// Generic extension allowing rubbish in high bits. +HANDLE_TARGET_OPCODE(G_ANYEXT) + +/// Generic instruction to discard the high bits of a register. This differs +/// from (G_EXTRACT val, 0) on its action on vectors: G_TRUNC will truncate +/// each element individually, G_EXTRACT will typically discard the high +/// elements of the vector. +HANDLE_TARGET_OPCODE(G_TRUNC) + +/// Generic integer constant. +HANDLE_TARGET_OPCODE(G_CONSTANT) + +/// Generic floating constant. +HANDLE_TARGET_OPCODE(G_FCONSTANT) + +/// Generic va_start instruction. Stores to its one pointer operand. +HANDLE_TARGET_OPCODE(G_VASTART) + +/// Generic va_start instruction. Stores to its one pointer operand. +HANDLE_TARGET_OPCODE(G_VAARG) + +// Generic sign extend +HANDLE_TARGET_OPCODE(G_SEXT) + +// Generic zero extend +HANDLE_TARGET_OPCODE(G_ZEXT) + +// Generic left-shift +HANDLE_TARGET_OPCODE(G_SHL) + +// Generic logical right-shift +HANDLE_TARGET_OPCODE(G_LSHR) + +// Generic arithmetic right-shift +HANDLE_TARGET_OPCODE(G_ASHR) + +/// Generic integer-base comparison, also applicable to vectors of integers. +HANDLE_TARGET_OPCODE(G_ICMP) + +/// Generic floating-point comparison, also applicable to vectors. +HANDLE_TARGET_OPCODE(G_FCMP) + +/// Generic select. +HANDLE_TARGET_OPCODE(G_SELECT) + +/// Generic unsigned add instruction, consuming the normal operands plus a carry +/// flag, and similarly producing the result and a carry flag. +HANDLE_TARGET_OPCODE(G_UADDE) + +/// Generic unsigned subtract instruction, consuming the normal operands plus a +/// carry flag, and similarly producing the result and a carry flag. +HANDLE_TARGET_OPCODE(G_USUBE) + +/// Generic signed add instruction, producing the result and a signed overflow +/// flag. +HANDLE_TARGET_OPCODE(G_SADDO) + +/// Generic signed subtract instruction, producing the result and a signed +/// overflow flag. +HANDLE_TARGET_OPCODE(G_SSUBO) + +/// Generic unsigned multiply instruction, producing the result and a signed +/// overflow flag. +HANDLE_TARGET_OPCODE(G_UMULO) + +/// Generic signed multiply instruction, producing the result and a signed +/// overflow flag. +HANDLE_TARGET_OPCODE(G_SMULO) + +// Multiply two numbers at twice the incoming bit width (unsigned) and return +// the high half of the result. +HANDLE_TARGET_OPCODE(G_UMULH) + +// Multiply two numbers at twice the incoming bit width (signed) and return +// the high half of the result. +HANDLE_TARGET_OPCODE(G_SMULH) + +/// Generic FP addition. +HANDLE_TARGET_OPCODE(G_FADD) + +/// Generic FP subtraction. +HANDLE_TARGET_OPCODE(G_FSUB) + +/// Generic FP multiplication. +HANDLE_TARGET_OPCODE(G_FMUL) + +/// Generic FMA multiplication. Behaves like llvm fma intrinsic +HANDLE_TARGET_OPCODE(G_FMA) + +/// Generic FP division. +HANDLE_TARGET_OPCODE(G_FDIV) + +/// Generic FP remainder. +HANDLE_TARGET_OPCODE(G_FREM) + +/// Generic FP exponentiation. +HANDLE_TARGET_OPCODE(G_FPOW) + +/// Generic base-e exponential of a value. +HANDLE_TARGET_OPCODE(G_FEXP) + +/// Generic base-2 exponential of a value. +HANDLE_TARGET_OPCODE(G_FEXP2) + +/// Floating point base-e logarithm of a value. +HANDLE_TARGET_OPCODE(G_FLOG) + +/// Floating point base-2 logarithm of a value. +HANDLE_TARGET_OPCODE(G_FLOG2) + +/// Generic FP negation. +HANDLE_TARGET_OPCODE(G_FNEG) + +/// Generic FP extension. +HANDLE_TARGET_OPCODE(G_FPEXT) + +/// Generic float to signed-int conversion +HANDLE_TARGET_OPCODE(G_FPTRUNC) + +/// Generic float to signed-int conversion +HANDLE_TARGET_OPCODE(G_FPTOSI) + +/// Generic float to unsigned-int conversion +HANDLE_TARGET_OPCODE(G_FPTOUI) + +/// Generic signed-int to float conversion +HANDLE_TARGET_OPCODE(G_SITOFP) + +/// Generic unsigned-int to float conversion +HANDLE_TARGET_OPCODE(G_UITOFP) + +/// Generic pointer offset +HANDLE_TARGET_OPCODE(G_GEP) + +/// Clear the specified number of low bits in a pointer. This rounds the value +/// *down* to the given alignment. +HANDLE_TARGET_OPCODE(G_PTR_MASK) + +/// Generic BRANCH instruction. This is an unconditional branch. +HANDLE_TARGET_OPCODE(G_BR) + +/// Generic insertelement. +HANDLE_TARGET_OPCODE(G_INSERT_VECTOR_ELT) + +/// Generic extractelement. +HANDLE_TARGET_OPCODE(G_EXTRACT_VECTOR_ELT) + +/// Generic shufflevector. +HANDLE_TARGET_OPCODE(G_SHUFFLE_VECTOR) + +/// Generic byte swap. +HANDLE_TARGET_OPCODE(G_BSWAP) + +// TODO: Add more generic opcodes as we move along. + +/// Marker for the end of the generic opcode. +/// This is used to check if an opcode is in the range of the +/// generic opcodes. +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BSWAP) + +/// BUILTIN_OP_END - This must be the last enum value in this list. +/// The target-specific post-isel opcode values start here. +HANDLE_TARGET_OPCODE_MARKER(GENERIC_OP_END, PRE_ISEL_GENERIC_OPCODE_END) diff --git a/include/llvm/CodeGen/TargetOpcodes.h b/include/llvm/CodeGen/TargetOpcodes.h new file mode 100644 index 0000000000000..3ca31a9709446 --- /dev/null +++ b/include/llvm/CodeGen/TargetOpcodes.h @@ -0,0 +1,42 @@ +//===-- llvm/CodeGen/TargetOpcodes.h - Target Indep Opcodes -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the target independent instruction opcodes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETOPCODES_H +#define LLVM_CODEGEN_TARGETOPCODES_H + +namespace llvm { + +/// Invariant opcodes: All instruction sets have these as their low opcodes. +/// +namespace TargetOpcode { +enum { +#define HANDLE_TARGET_OPCODE(OPC) OPC, +#define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) IDENT = OPC, +#include "llvm/CodeGen/TargetOpcodes.def" +}; +} // end namespace TargetOpcode + +/// Check whether the given Opcode is a generic opcode that is not supposed +/// to appear after ISel. +inline bool isPreISelGenericOpcode(unsigned Opcode) { + return Opcode >= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START && + Opcode <= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; +} + +/// Check whether the given Opcode is a target-specific opcode. +inline bool isTargetSpecificOpcode(unsigned Opcode) { + return Opcode > TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; +} +} // end namespace llvm + +#endif diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index aaf0ab5d5481d..1aaa85d77a54f 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -108,6 +108,18 @@ private: bool Stopped = false; bool AddingMachinePasses = false; + /// Set the StartAfter, StartBefore and StopAfter passes to allow running only + /// a portion of the normal code-gen pass sequence. + /// + /// If the StartAfter and StartBefore pass ID is zero, then compilation will + /// begin at the normal point; otherwise, clear the Started flag to indicate + /// that passes should not be added until the starting pass is seen. If the + /// Stop pass ID is zero, then compilation will continue to the end. + /// + /// This function expects that at least one of the StartAfter or the + /// StartBefore pass IDs is null. + void setStartStopPasses(); + protected: LLVMTargetMachine *TM; PassConfigImpl *Impl = nullptr; // Internal data structures @@ -147,27 +159,25 @@ public: CodeGenOpt::Level getOptLevel() const; - /// Set the StartAfter, StartBefore and StopAfter passes to allow running only - /// a portion of the normal code-gen pass sequence. - /// - /// If the StartAfter and StartBefore pass ID is zero, then compilation will - /// begin at the normal point; otherwise, clear the Started flag to indicate - /// that passes should not be added until the starting pass is seen. If the - /// Stop pass ID is zero, then compilation will continue to the end. - /// - /// This function expects that at least one of the StartAfter or the - /// StartBefore pass IDs is null. - void setStartStopPasses(AnalysisID StartBefore, AnalysisID StartAfter, - AnalysisID StopBefore, AnalysisID StopAfter) { - assert(!(StartBefore && StartAfter) && - "Start after and start before passes are given"); - assert(!(StopBefore && StopAfter) && - "Stop after and stop before passed are given"); - this->StartBefore = StartBefore; - this->StartAfter = StartAfter; - this->StopBefore = StopBefore; - this->StopAfter = StopAfter; - Started = (StartAfter == nullptr) && (StartBefore == nullptr); + /// Describe the status of the codegen + /// pipeline set by this target pass config. + /// Having a limited codegen pipeline means that options + /// have been used to restrict what codegen is doing. + /// In particular, that means that codegen won't emit + /// assembly code. + bool hasLimitedCodeGenPipeline() const; + + /// If hasLimitedCodeGenPipeline is true, this method + /// returns a string with the name of the options, separated + /// by \p Separator that caused this pipeline to be limited. + std::string + getLimitedCodeGenPipelineReason(const char *Separator = "/") const; + + /// Check if the codegen pipeline is limited in such a way that it + /// won't be complete. When the codegen pipeline is not complete, + /// this means it may not be possible to generate assembly from it. + bool willCompleteCodeGenPipeline() const { + return !hasLimitedCodeGenPipeline() || (!StopAfter && !StopBefore); } void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); } diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h new file mode 100644 index 0000000000000..81907538fb0b1 --- /dev/null +++ b/include/llvm/CodeGen/TargetRegisterInfo.h @@ -0,0 +1,1177 @@ +//==- CodeGen/TargetRegisterInfo.h - Target Register Information -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes an abstract interface used to get information about a +// target machines register file. This information is used for a variety of +// purposed, especially register allocation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETREGISTERINFO_H +#define LLVM_CODEGEN_TARGETREGISTERINFO_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Printable.h" +#include <cassert> +#include <cstdint> +#include <functional> + +namespace llvm { + +class BitVector; +class LiveRegMatrix; +class MachineFunction; +class MachineInstr; +class RegScavenger; +class VirtRegMap; +class LiveIntervals; + +class TargetRegisterClass { +public: + using iterator = const MCPhysReg *; + using const_iterator = const MCPhysReg *; + using sc_iterator = const TargetRegisterClass* const *; + + // Instance variables filled by tablegen, do not use! + const MCRegisterClass *MC; + const uint32_t *SubClassMask; + const uint16_t *SuperRegIndices; + const LaneBitmask LaneMask; + /// Classes with a higher priority value are assigned first by register + /// allocators using a greedy heuristic. The value is in the range [0,63]. + const uint8_t AllocationPriority; + /// Whether the class supports two (or more) disjunct subregister indices. + const bool HasDisjunctSubRegs; + /// Whether a combination of subregisters can cover every register in the + /// class. See also the CoveredBySubRegs description in Target.td. + const bool CoveredBySubRegs; + const sc_iterator SuperClasses; + ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&); + + /// Return the register class ID number. + unsigned getID() const { return MC->getID(); } + + /// begin/end - Return all of the registers in this class. + /// + iterator begin() const { return MC->begin(); } + iterator end() const { return MC->end(); } + + /// Return the number of registers in this class. + unsigned getNumRegs() const { return MC->getNumRegs(); } + + iterator_range<SmallVectorImpl<MCPhysReg>::const_iterator> + getRegisters() const { + return make_range(MC->begin(), MC->end()); + } + + /// Return the specified register in the class. + unsigned getRegister(unsigned i) const { + return MC->getRegister(i); + } + + /// Return true if the specified register is included in this register class. + /// This does not include virtual registers. + bool contains(unsigned Reg) const { + return MC->contains(Reg); + } + + /// Return true if both registers are in this class. + bool contains(unsigned Reg1, unsigned Reg2) const { + return MC->contains(Reg1, Reg2); + } + + /// Return the cost of copying a value between two registers in this class. + /// A negative number means the register class is very expensive + /// to copy e.g. status flag register classes. + int getCopyCost() const { return MC->getCopyCost(); } + + /// Return true if this register class may be used to create virtual + /// registers. + bool isAllocatable() const { return MC->isAllocatable(); } + + /// Return true if the specified TargetRegisterClass + /// is a proper sub-class of this TargetRegisterClass. + bool hasSubClass(const TargetRegisterClass *RC) const { + return RC != this && hasSubClassEq(RC); + } + + /// Returns true if RC is a sub-class of or equal to this class. + bool hasSubClassEq(const TargetRegisterClass *RC) const { + unsigned ID = RC->getID(); + return (SubClassMask[ID / 32] >> (ID % 32)) & 1; + } + + /// Return true if the specified TargetRegisterClass is a + /// proper super-class of this TargetRegisterClass. + bool hasSuperClass(const TargetRegisterClass *RC) const { + return RC->hasSubClass(this); + } + + /// Returns true if RC is a super-class of or equal to this class. + bool hasSuperClassEq(const TargetRegisterClass *RC) const { + return RC->hasSubClassEq(this); + } + + /// Returns a bit vector of subclasses, including this one. + /// The vector is indexed by class IDs. + /// + /// To use it, consider the returned array as a chunk of memory that + /// contains an array of bits of size NumRegClasses. Each 32-bit chunk + /// contains a bitset of the ID of the subclasses in big-endian style. + + /// I.e., the representation of the memory from left to right at the + /// bit level looks like: + /// [31 30 ... 1 0] [ 63 62 ... 33 32] ... + /// [ XXX NumRegClasses NumRegClasses - 1 ... ] + /// Where the number represents the class ID and XXX bits that + /// should be ignored. + /// + /// See the implementation of hasSubClassEq for an example of how it + /// can be used. + const uint32_t *getSubClassMask() const { + return SubClassMask; + } + + /// Returns a 0-terminated list of sub-register indices that project some + /// super-register class into this register class. The list has an entry for + /// each Idx such that: + /// + /// There exists SuperRC where: + /// For all Reg in SuperRC: + /// this->contains(Reg:Idx) + const uint16_t *getSuperRegIndices() const { + return SuperRegIndices; + } + + /// Returns a NULL-terminated list of super-classes. The + /// classes are ordered by ID which is also a topological ordering from large + /// to small classes. The list does NOT include the current class. + sc_iterator getSuperClasses() const { + return SuperClasses; + } + + /// Return true if this TargetRegisterClass is a subset + /// class of at least one other TargetRegisterClass. + bool isASubClass() const { + return SuperClasses[0] != nullptr; + } + + /// Returns the preferred order for allocating registers from this register + /// class in MF. The raw order comes directly from the .td file and may + /// include reserved registers that are not allocatable. + /// Register allocators should also make sure to allocate + /// callee-saved registers only after all the volatiles are used. The + /// RegisterClassInfo class provides filtered allocation orders with + /// callee-saved registers moved to the end. + /// + /// The MachineFunction argument can be used to tune the allocatable + /// registers based on the characteristics of the function, subtarget, or + /// other criteria. + /// + /// By default, this method returns all registers in the class. + ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const { + return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs()); + } + + /// Returns the combination of all lane masks of register in this class. + /// The lane masks of the registers are the combination of all lane masks + /// of their subregisters. Returns 1 if there are no subregisters. + LaneBitmask getLaneMask() const { + return LaneMask; + } +}; + +/// Extra information, not in MCRegisterDesc, about registers. +/// These are used by codegen, not by MC. +struct TargetRegisterInfoDesc { + unsigned CostPerUse; // Extra cost of instructions using register. + bool inAllocatableClass; // Register belongs to an allocatable regclass. +}; + +/// Each TargetRegisterClass has a per register weight, and weight +/// limit which must be less than the limits of its pressure sets. +struct RegClassWeight { + unsigned RegWeight; + unsigned WeightLimit; +}; + +/// TargetRegisterInfo base class - We assume that the target defines a static +/// array of TargetRegisterDesc objects that represent all of the machine +/// registers that the target has. As such, we simply have to track a pointer +/// to this array so that we can turn register number into a register +/// descriptor. +/// +class TargetRegisterInfo : public MCRegisterInfo { +public: + using regclass_iterator = const TargetRegisterClass * const *; + using vt_iterator = const MVT::SimpleValueType *; + struct RegClassInfo { + unsigned RegSize, SpillSize, SpillAlignment; + vt_iterator VTList; + }; +private: + const TargetRegisterInfoDesc *InfoDesc; // Extra desc array for codegen + const char *const *SubRegIndexNames; // Names of subreg indexes. + // Pointer to array of lane masks, one per sub-reg index. + const LaneBitmask *SubRegIndexLaneMasks; + + regclass_iterator RegClassBegin, RegClassEnd; // List of regclasses + LaneBitmask CoveringLanes; + const RegClassInfo *const RCInfos; + unsigned HwMode; + +protected: + TargetRegisterInfo(const TargetRegisterInfoDesc *ID, + regclass_iterator RegClassBegin, + regclass_iterator RegClassEnd, + const char *const *SRINames, + const LaneBitmask *SRILaneMasks, + LaneBitmask CoveringLanes, + const RegClassInfo *const RSI, + unsigned Mode = 0); + virtual ~TargetRegisterInfo(); + +public: + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // + // 0 Not a register, can be used as a sentinel. + // [1;2^30) Physical registers assigned by TableGen. + // [2^30;2^31) Stack slots. (Rarely used.) + // [2^31;2^32) Virtual registers assigned by MachineRegisterInfo. + // + // Further sentinels can be allocated from the small negative integers. + // DenseMapInfo<unsigned> uses -1u and -2u. + + /// isStackSlot - Sometimes it is useful the be able to store a non-negative + /// frame index in a variable that normally holds a register. isStackSlot() + /// returns true if Reg is in the range used for stack slots. + /// + /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack + /// slots, so if a variable may contains a stack slot, always check + /// isStackSlot() first. + /// + static bool isStackSlot(unsigned Reg) { + return int(Reg) >= (1 << 30); + } + + /// Compute the frame index from a register value representing a stack slot. + static int stackSlot2Index(unsigned Reg) { + assert(isStackSlot(Reg) && "Not a stack slot"); + return int(Reg - (1u << 30)); + } + + /// Convert a non-negative frame index to a stack slot register value. + static unsigned index2StackSlot(int FI) { + assert(FI >= 0 && "Cannot hold a negative frame index."); + return FI + (1u << 30); + } + + /// Return true if the specified register number is in + /// the physical register namespace. + static bool isPhysicalRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); + return int(Reg) > 0; + } + + /// Return true if the specified register number is in + /// the virtual register namespace. + static bool isVirtualRegister(unsigned Reg) { + assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); + return int(Reg) < 0; + } + + /// Convert a virtual register number to a 0-based index. + /// The first virtual register in a function will get the index 0. + static unsigned virtReg2Index(unsigned Reg) { + assert(isVirtualRegister(Reg) && "Not a virtual register"); + return Reg & ~(1u << 31); + } + + /// Convert a 0-based index to a virtual register number. + /// This is the inverse operation of VirtReg2IndexFunctor below. + static unsigned index2VirtReg(unsigned Index) { + return Index | (1u << 31); + } + + /// Return the size in bits of a register from class RC. + unsigned getRegSizeInBits(const TargetRegisterClass &RC) const { + return getRegClassInfo(RC).RegSize; + } + + /// Return the size in bytes of the stack slot allocated to hold a spilled + /// copy of a register from class RC. + unsigned getSpillSize(const TargetRegisterClass &RC) const { + return getRegClassInfo(RC).SpillSize / 8; + } + + /// Return the minimum required alignment in bytes for a spill slot for + /// a register of this class. + unsigned getSpillAlignment(const TargetRegisterClass &RC) const { + return getRegClassInfo(RC).SpillAlignment / 8; + } + + /// Return true if the given TargetRegisterClass has the ValueType T. + bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const { + for (auto I = legalclasstypes_begin(RC); *I != MVT::Other; ++I) + if (MVT(*I) == T) + return true; + return false; + } + + /// Loop over all of the value types that can be represented by values + /// in the given register class. + vt_iterator legalclasstypes_begin(const TargetRegisterClass &RC) const { + return getRegClassInfo(RC).VTList; + } + + vt_iterator legalclasstypes_end(const TargetRegisterClass &RC) const { + vt_iterator I = legalclasstypes_begin(RC); + while (*I != MVT::Other) + ++I; + return I; + } + + /// Returns the Register Class of a physical register of the given type, + /// picking the most sub register class of the right type that contains this + /// physreg. + const TargetRegisterClass * + getMinimalPhysRegClass(unsigned Reg, MVT VT = MVT::Other) const; + + /// Return the maximal subclass of the given register class that is + /// allocatable or NULL. + const TargetRegisterClass * + getAllocatableClass(const TargetRegisterClass *RC) const; + + /// Returns a bitset indexed by register number indicating if a register is + /// allocatable or not. If a register class is specified, returns the subset + /// for the class. + BitVector getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC = nullptr) const; + + /// Return the additional cost of using this register instead + /// of other registers in its class. + unsigned getCostPerUse(unsigned RegNo) const { + return InfoDesc[RegNo].CostPerUse; + } + + /// Return true if the register is in the allocation of any register class. + bool isInAllocatableClass(unsigned RegNo) const { + return InfoDesc[RegNo].inAllocatableClass; + } + + /// Return the human-readable symbolic target-specific + /// name for the specified SubRegIndex. + const char *getSubRegIndexName(unsigned SubIdx) const { + assert(SubIdx && SubIdx < getNumSubRegIndices() && + "This is not a subregister index"); + return SubRegIndexNames[SubIdx-1]; + } + + /// Return a bitmask representing the parts of a register that are covered by + /// SubIdx \see LaneBitmask. + /// + /// SubIdx == 0 is allowed, it has the lane mask ~0u. + LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const { + assert(SubIdx < getNumSubRegIndices() && "This is not a subregister index"); + return SubRegIndexLaneMasks[SubIdx]; + } + + /// The lane masks returned by getSubRegIndexLaneMask() above can only be + /// used to determine if sub-registers overlap - they can't be used to + /// determine if a set of sub-registers completely cover another + /// sub-register. + /// + /// The X86 general purpose registers have two lanes corresponding to the + /// sub_8bit and sub_8bit_hi sub-registers. Both sub_32bit and sub_16bit have + /// lane masks '3', but the sub_16bit sub-register doesn't fully cover the + /// sub_32bit sub-register. + /// + /// On the other hand, the ARM NEON lanes fully cover their registers: The + /// dsub_0 sub-register is completely covered by the ssub_0 and ssub_1 lanes. + /// This is related to the CoveredBySubRegs property on register definitions. + /// + /// This function returns a bit mask of lanes that completely cover their + /// sub-registers. More precisely, given: + /// + /// Covering = getCoveringLanes(); + /// MaskA = getSubRegIndexLaneMask(SubA); + /// MaskB = getSubRegIndexLaneMask(SubB); + /// + /// If (MaskA & ~(MaskB & Covering)) == 0, then SubA is completely covered by + /// SubB. + LaneBitmask getCoveringLanes() const { return CoveringLanes; } + + /// Returns true if the two registers are equal or alias each other. + /// The registers may be virtual registers. + bool regsOverlap(unsigned regA, unsigned regB) const { + if (regA == regB) return true; + if (isVirtualRegister(regA) || isVirtualRegister(regB)) + return false; + + // Regunits are numerically ordered. Find a common unit. + MCRegUnitIterator RUA(regA, this); + MCRegUnitIterator RUB(regB, this); + do { + if (*RUA == *RUB) return true; + if (*RUA < *RUB) ++RUA; + else ++RUB; + } while (RUA.isValid() && RUB.isValid()); + return false; + } + + /// Returns true if Reg contains RegUnit. + bool hasRegUnit(unsigned Reg, unsigned RegUnit) const { + for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) + if (*Units == RegUnit) + return true; + return false; + } + + /// Return a null-terminated list of all of the callee-saved registers on + /// this target. The register should be in the order of desired callee-save + /// stack frame offset. The first register is closest to the incoming stack + /// pointer if stack grows down, and vice versa. + /// Notice: This function does not take into account disabled CSRs. + /// In most cases you will want to use instead the function + /// getCalleeSavedRegs that is implemented in MachineRegisterInfo. + virtual const MCPhysReg* + getCalleeSavedRegs(const MachineFunction *MF) const = 0; + + /// Return a mask of call-preserved registers for the given calling convention + /// on the current function. The mask should include all call-preserved + /// aliases. This is used by the register allocator to determine which + /// registers can be live across a call. + /// + /// The mask is an array containing (TRI::getNumRegs()+31)/32 entries. + /// A set bit indicates that all bits of the corresponding register are + /// preserved across the function call. The bit mask is expected to be + /// sub-register complete, i.e. if A is preserved, so are all its + /// sub-registers. + /// + /// Bits are numbered from the LSB, so the bit for physical register Reg can + /// be found as (Mask[Reg / 32] >> Reg % 32) & 1. + /// + /// A NULL pointer means that no register mask will be used, and call + /// instructions should use implicit-def operands to indicate call clobbered + /// registers. + /// + virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const { + // The default mask clobbers everything. All targets should override. + return nullptr; + } + + /// Return a register mask that clobbers everything. + virtual const uint32_t *getNoPreservedMask() const { + llvm_unreachable("target does not provide no preserved mask"); + } + + /// Return true if all bits that are set in mask \p mask0 are also set in + /// \p mask1. + bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; + + /// Return all the call-preserved register masks defined for this target. + virtual ArrayRef<const uint32_t *> getRegMasks() const = 0; + virtual ArrayRef<const char *> getRegMaskNames() const = 0; + + /// Returns a bitset indexed by physical register number indicating if a + /// register is a special register that has particular uses and should be + /// considered unavailable at all times, e.g. stack pointer, return address. + /// A reserved register: + /// - is not allocatable + /// - is considered always live + /// - is ignored by liveness tracking + /// It is often necessary to reserve the super registers of a reserved + /// register as well, to avoid them getting allocated indirectly. You may use + /// markSuperRegs() and checkAllSuperRegsMarked() in this case. + virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; + + /// Returns true if PhysReg is unallocatable and constant throughout the + /// function. Used by MachineRegisterInfo::isConstantPhysReg(). + virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; } + + /// Physical registers that may be modified within a function but are + /// guaranteed to be restored before any uses. This is useful for targets that + /// have call sequences where a GOT register may be updated by the caller + /// prior to a call and is guaranteed to be restored (also by the caller) + /// after the call. + virtual bool isCallerPreservedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + return false; + } + + /// Prior to adding the live-out mask to a stackmap or patchpoint + /// instruction, provide the target the opportunity to adjust it (mainly to + /// remove pseudo-registers that should be ignored). + virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const {} + + /// Return a super-register of the specified register + /// Reg so its sub-register of index SubIdx is Reg. + unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterClass *RC) const { + return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC); + } + + /// Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + /// + /// TableGen will synthesize missing A sub-classes. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const; + + // For a copy-like instruction that defines a register of class DefRC with + // subreg index DefSubReg, reading from another source with class SrcRC and + // subregister SrcSubReg return true if this is a preferable copy + // instruction or an earlier use should be used. + virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const; + + /// Returns the largest legal sub-class of RC that + /// supports the sub-register index Idx. + /// If no such sub-class exists, return NULL. + /// If all registers in RC already have an Idx sub-register, return RC. + /// + /// TableGen generates a version of this function that is good enough in most + /// cases. Targets can override if they have constraints that TableGen + /// doesn't understand. For example, the x86 sub_8bit sub-register index is + /// supported by the full GR32 register class in 64-bit mode, but only by the + /// GR32_ABCD regiister class in 32-bit mode. + /// + /// TableGen will synthesize missing RC sub-classes. + virtual const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const { + assert(Idx == 0 && "Target has no sub-registers"); + return RC; + } + + /// Return the subregister index you get from composing + /// two subregister indices. + /// + /// The special null sub-register index composes as the identity. + /// + /// If R:a:b is the same register as R:c, then composeSubRegIndices(a, b) + /// returns c. Note that composeSubRegIndices does not tell you about illegal + /// compositions. If R does not have a subreg a, or R:a does not have a subreg + /// b, composeSubRegIndices doesn't tell you. + /// + /// The ARM register Q0 has two D subregs dsub_0:D0 and dsub_1:D1. It also has + /// ssub_0:S0 - ssub_3:S3 subregs. + /// If you compose subreg indices dsub_1, ssub_0 you get ssub_2. + unsigned composeSubRegIndices(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return composeSubRegIndicesImpl(a, b); + } + + /// Transforms a LaneMask computed for one subregister to the lanemask that + /// would have been computed when composing the subsubregisters with IdxA + /// first. @sa composeSubRegIndices() + LaneBitmask composeSubRegIndexLaneMask(unsigned IdxA, + LaneBitmask Mask) const { + if (!IdxA) + return Mask; + return composeSubRegIndexLaneMaskImpl(IdxA, Mask); + } + + /// Transform a lanemask given for a virtual register to the corresponding + /// lanemask before using subregister with index \p IdxA. + /// This is the reverse of composeSubRegIndexLaneMask(), assuming Mask is a + /// valie lane mask (no invalid bits set) the following holds: + /// X0 = composeSubRegIndexLaneMask(Idx, Mask) + /// X1 = reverseComposeSubRegIndexLaneMask(Idx, X0) + /// => X1 == Mask + LaneBitmask reverseComposeSubRegIndexLaneMask(unsigned IdxA, + LaneBitmask LaneMask) const { + if (!IdxA) + return LaneMask; + return reverseComposeSubRegIndexLaneMaskImpl(IdxA, LaneMask); + } + + /// Debugging helper: dump register in human readable form to dbgs() stream. + static void dumpReg(unsigned Reg, unsigned SubRegIndex = 0, + const TargetRegisterInfo* TRI = nullptr); + +protected: + /// Overridden by TableGen in targets that have sub-registers. + virtual unsigned composeSubRegIndicesImpl(unsigned, unsigned) const { + llvm_unreachable("Target has no sub-registers"); + } + + /// Overridden by TableGen in targets that have sub-registers. + virtual LaneBitmask + composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const { + llvm_unreachable("Target has no sub-registers"); + } + + virtual LaneBitmask reverseComposeSubRegIndexLaneMaskImpl(unsigned, + LaneBitmask) const { + llvm_unreachable("Target has no sub-registers"); + } + +public: + /// Find a common super-register class if it exists. + /// + /// Find a register class, SuperRC and two sub-register indices, PreA and + /// PreB, such that: + /// + /// 1. PreA + SubA == PreB + SubB (using composeSubRegIndices()), and + /// + /// 2. For all Reg in SuperRC: Reg:PreA in RCA and Reg:PreB in RCB, and + /// + /// 3. SuperRC->getSize() >= max(RCA->getSize(), RCB->getSize()). + /// + /// SuperRC will be chosen such that no super-class of SuperRC satisfies the + /// requirements, and there is no register class with a smaller spill size + /// that satisfies the requirements. + /// + /// SubA and SubB must not be 0. Use getMatchingSuperRegClass() instead. + /// + /// Either of the PreA and PreB sub-register indices may be returned as 0. In + /// that case, the returned register class will be a sub-class of the + /// corresponding argument register class. + /// + /// The function returns NULL if no register class can be found. + const TargetRegisterClass* + getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, + const TargetRegisterClass *RCB, unsigned SubB, + unsigned &PreA, unsigned &PreB) const; + + //===--------------------------------------------------------------------===// + // Register Class Information + // +protected: + const RegClassInfo &getRegClassInfo(const TargetRegisterClass &RC) const { + return RCInfos[getNumRegClasses() * HwMode + RC.getID()]; + } + +public: + /// Register class iterators + regclass_iterator regclass_begin() const { return RegClassBegin; } + regclass_iterator regclass_end() const { return RegClassEnd; } + iterator_range<regclass_iterator> regclasses() const { + return make_range(regclass_begin(), regclass_end()); + } + + unsigned getNumRegClasses() const { + return (unsigned)(regclass_end()-regclass_begin()); + } + + /// Returns the register class associated with the enumeration value. + /// See class MCOperandInfo. + const TargetRegisterClass *getRegClass(unsigned i) const { + assert(i < getNumRegClasses() && "Register Class ID out of range"); + return RegClassBegin[i]; + } + + /// Returns the name of the register class. + const char *getRegClassName(const TargetRegisterClass *Class) const { + return MCRegisterInfo::getRegClassName(Class->MC); + } + + /// Find the largest common subclass of A and B. + /// Return NULL if there is no common subclass. + /// The common subclass should contain + /// simple value type SVT if it is not the Any type. + const TargetRegisterClass * + getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) const; + + /// Returns a TargetRegisterClass used for pointer values. + /// If a target supports multiple different pointer register classes, + /// kind specifies which one is indicated. + virtual const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const { + llvm_unreachable("Target didn't implement getPointerRegClass!"); + } + + /// Returns a legal register class to copy a register in the specified class + /// to or from. If it is possible to copy the register directly without using + /// a cross register class copy, return the specified RC. Returns NULL if it + /// is not possible to copy between two registers of the specified class. + virtual const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const { + return RC; + } + + /// Returns the largest super class of RC that is legal to use in the current + /// sub-target and has the same spill size. + /// The returned register class can be used to create virtual registers which + /// means that all its registers can be copied and spilled. + virtual const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &) const { + /// The default implementation is very conservative and doesn't allow the + /// register allocator to inflate register classes. + return RC; + } + + /// Return the register pressure "high water mark" for the specific register + /// class. The scheduler is in high register pressure mode (for the specific + /// register class) if it goes over the limit. + /// + /// Note: this is the old register pressure model that relies on a manually + /// specified representative register class per value type. + virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + return 0; + } + + /// Return a heuristic for the machine scheduler to compare the profitability + /// of increasing one register pressure set versus another. The scheduler + /// will prefer increasing the register pressure of the set which returns + /// the largest value for this function. + virtual unsigned getRegPressureSetScore(const MachineFunction &MF, + unsigned PSetID) const { + return PSetID; + } + + /// Get the weight in units of pressure for this register class. + virtual const RegClassWeight &getRegClassWeight( + const TargetRegisterClass *RC) const = 0; + + /// Get the weight in units of pressure for this register unit. + virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0; + + /// Get the number of dimensions of register pressure. + virtual unsigned getNumRegPressureSets() const = 0; + + /// Get the name of this register unit pressure set. + virtual const char *getRegPressureSetName(unsigned Idx) const = 0; + + /// Get the register unit pressure limit for this dimension. + /// This limit must be adjusted dynamically for reserved registers. + virtual unsigned getRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const = 0; + + /// Get the dimensions of register pressure impacted by this register class. + /// Returns a -1 terminated array of pressure set IDs. + virtual const int *getRegClassPressureSets( + const TargetRegisterClass *RC) const = 0; + + /// Get the dimensions of register pressure impacted by this register unit. + /// Returns a -1 terminated array of pressure set IDs. + virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0; + + /// Get a list of 'hint' registers that the register allocator should try + /// first when allocating a physical register for the virtual register + /// VirtReg. These registers are effectively moved to the front of the + /// allocation order. If true is returned, regalloc will try to only use + /// hints to the greatest extent possible even if it means spilling. + /// + /// The Order argument is the allocation order for VirtReg's register class + /// as returned from RegisterClassInfo::getOrder(). The hint registers must + /// come from Order, and they must not be reserved. + /// + /// The default implementation of this function will only add target + /// independent register allocation hints. Targets that override this + /// function should typically call this default implementation as well and + /// expect to see generic copy hints added. + virtual bool getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM = nullptr, + const LiveRegMatrix *Matrix = nullptr) + const; + + /// A callback to allow target a chance to update register allocation hints + /// when a register is "changed" (e.g. coalesced) to another register. + /// e.g. On ARM, some virtual registers should target register pairs, + /// if one of pair is coalesced to another register, the allocation hint of + /// the other half of the pair should be changed to point to the new register. + virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const { + // Do nothing. + } + + /// The creation of multiple copy hints have been implemented in + /// weightCalcHelper(), but since this affects so many tests for many + /// targets, this is temporarily disabled per default. THIS SHOULD BE + /// "GENERAL GOODNESS" and hopefully all targets will update their tests + /// and enable this soon. This hook should then be removed. + virtual bool enableMultipleCopyHints() const { return false; } + + /// Allow the target to reverse allocation order of local live ranges. This + /// will generally allocate shorter local live ranges first. For targets with + /// many registers, this could reduce regalloc compile time by a large + /// factor. It is disabled by default for three reasons: + /// (1) Top-down allocation is simpler and easier to debug for targets that + /// don't benefit from reversing the order. + /// (2) Bottom-up allocation could result in poor evicition decisions on some + /// targets affecting the performance of compiled code. + /// (3) Bottom-up allocation is no longer guaranteed to optimally color. + virtual bool reverseLocalAssignment() const { return false; } + + /// Allow the target to override the cost of using a callee-saved register for + /// the first time. Default value of 0 means we will use a callee-saved + /// register if it is available. + virtual unsigned getCSRFirstUseCost() const { return 0; } + + /// Returns true if the target requires (and can make use of) the register + /// scavenger. + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { + return false; + } + + /// Returns true if the target wants to use frame pointer based accesses to + /// spill to the scavenger emergency spill slot. + virtual bool useFPForScavengingIndex(const MachineFunction &MF) const { + return true; + } + + /// Returns true if the target requires post PEI scavenging of registers for + /// materializing frame index constants. + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return false; + } + + /// Returns true if the target requires using the RegScavenger directly for + /// frame elimination despite using requiresFrameIndexScavenging. + virtual bool requiresFrameIndexReplacementScavenging( + const MachineFunction &MF) const { + return false; + } + + /// Returns true if the target wants the LocalStackAllocation pass to be run + /// and virtual base registers used for more efficient stack access. + virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const { + return false; + } + + /// Return true if target has reserved a spill slot in the stack frame of + /// the given function for the specified register. e.g. On x86, if the frame + /// register is required, the first fixed stack object is reserved as its + /// spill slot. This tells PEI not to create a new stack frame + /// object for the given register. It should be called only after + /// determineCalleeSaves(). + virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, + int &FrameIdx) const { + return false; + } + + /// Returns true if the live-ins should be tracked after register allocation. + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return false; + } + + /// True if the stack can be realigned for the target. + virtual bool canRealignStack(const MachineFunction &MF) const; + + /// True if storage within the function requires the stack pointer to be + /// aligned more than the normal calling convention calls for. + /// This cannot be overriden by the target, but canRealignStack can be + /// overridden. + bool needsStackRealignment(const MachineFunction &MF) const; + + /// Get the offset from the referenced frame index in the instruction, + /// if there is one. + virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI, + int Idx) const { + return 0; + } + + /// Returns true if the instruction's frame index reference would be better + /// served by a base register other than FP or SP. + /// Used by LocalStackFrameAllocation to determine which frame index + /// references it should create new base registers for. + virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { + return false; + } + + /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx + /// before insertion point I. + virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const { + llvm_unreachable("materializeFrameBaseRegister does not exist on this " + "target"); + } + + /// Resolve a frame index operand of an instruction + /// to reference the indicated base register plus offset instead. + virtual void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { + llvm_unreachable("resolveFrameIndex does not exist on this target"); + } + + /// Determine whether a given base register plus offset immediate is + /// encodable to resolve a frame index. + virtual bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, + int64_t Offset) const { + llvm_unreachable("isFrameOffsetLegal does not exist on this target"); + } + + /// Spill the register so it can be used by the register scavenger. + /// Return true if the register was spilled, false otherwise. + /// If this function does not spill the register, the scavenger + /// will instead spill it to the emergency spill slot. + virtual bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + return false; + } + + /// This method must be overriden to eliminate abstract frame indices from + /// instructions which may use them. The instruction referenced by the + /// iterator contains an MO_FrameIndex operand which must be eliminated by + /// this method. This method may modify or replace the specified instruction, + /// as long as it keeps the iterator pointing at the finished product. + /// SPAdj is the SP adjustment due to call frame setup instruction. + /// FIOperandNum is the FI operand number. + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = nullptr) const = 0; + + /// Return the assembly name for \p Reg. + virtual StringRef getRegAsmName(unsigned Reg) const { + // FIXME: We are assuming that the assembly name is equal to the TableGen + // name converted to lower case + // + // The TableGen name is the name of the definition for this register in the + // target's tablegen files. For example, the TableGen name of + // def EAX : Register <...>; is "EAX" + return StringRef(getName(Reg)); + } + + //===--------------------------------------------------------------------===// + /// Subtarget Hooks + + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true. + virtual bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const + { return true; } + + //===--------------------------------------------------------------------===// + /// Debug information queries. + + /// getFrameRegister - This method should return the register used as a base + /// for values allocated in the current stack frame. + virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0; + + /// Mark a register and all its aliases as reserved in the given set. + void markSuperRegs(BitVector &RegisterSet, unsigned Reg) const; + + /// Returns true if for every register in the set all super registers are part + /// of the set as well. + bool checkAllSuperRegsMarked(const BitVector &RegisterSet, + ArrayRef<MCPhysReg> Exceptions = ArrayRef<MCPhysReg>()) const; +}; + +//===----------------------------------------------------------------------===// +// SuperRegClassIterator +//===----------------------------------------------------------------------===// +// +// Iterate over the possible super-registers for a given register class. The +// iterator will visit a list of pairs (Idx, Mask) corresponding to the +// possible classes of super-registers. +// +// Each bit mask will have at least one set bit, and each set bit in Mask +// corresponds to a SuperRC such that: +// +// For all Reg in SuperRC: Reg:Idx is in RC. +// +// The iterator can include (O, RC->getSubClassMask()) as the first entry which +// also satisfies the above requirement, assuming Reg:0 == Reg. +// +class SuperRegClassIterator { + const unsigned RCMaskWords; + unsigned SubReg = 0; + const uint16_t *Idx; + const uint32_t *Mask; + +public: + /// Create a SuperRegClassIterator that visits all the super-register classes + /// of RC. When IncludeSelf is set, also include the (0, sub-classes) entry. + SuperRegClassIterator(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + bool IncludeSelf = false) + : RCMaskWords((TRI->getNumRegClasses() + 31) / 32), + Idx(RC->getSuperRegIndices()), Mask(RC->getSubClassMask()) { + if (!IncludeSelf) + ++*this; + } + + /// Returns true if this iterator is still pointing at a valid entry. + bool isValid() const { return Idx; } + + /// Returns the current sub-register index. + unsigned getSubReg() const { return SubReg; } + + /// Returns the bit mask of register classes that getSubReg() projects into + /// RC. + /// See TargetRegisterClass::getSubClassMask() for how to use it. + const uint32_t *getMask() const { return Mask; } + + /// Advance iterator to the next entry. + void operator++() { + assert(isValid() && "Cannot move iterator past end."); + Mask += RCMaskWords; + SubReg = *Idx++; + if (!SubReg) + Idx = nullptr; + } +}; + +//===----------------------------------------------------------------------===// +// BitMaskClassIterator +//===----------------------------------------------------------------------===// +/// This class encapuslates the logic to iterate over bitmask returned by +/// the various RegClass related APIs. +/// E.g., this class can be used to iterate over the subclasses provided by +/// TargetRegisterClass::getSubClassMask or SuperRegClassIterator::getMask. +class BitMaskClassIterator { + /// Total number of register classes. + const unsigned NumRegClasses; + /// Base index of CurrentChunk. + /// In other words, the number of bit we read to get at the + /// beginning of that chunck. + unsigned Base = 0; + /// Adjust base index of CurrentChunk. + /// Base index + how many bit we read within CurrentChunk. + unsigned Idx = 0; + /// Current register class ID. + unsigned ID = 0; + /// Mask we are iterating over. + const uint32_t *Mask; + /// Current chunk of the Mask we are traversing. + uint32_t CurrentChunk; + + /// Move ID to the next set bit. + void moveToNextID() { + // If the current chunk of memory is empty, move to the next one, + // while making sure we do not go pass the number of register + // classes. + while (!CurrentChunk) { + // Move to the next chunk. + Base += 32; + if (Base >= NumRegClasses) { + ID = NumRegClasses; + return; + } + CurrentChunk = *++Mask; + Idx = Base; + } + // Otherwise look for the first bit set from the right + // (representation of the class ID is big endian). + // See getSubClassMask for more details on the representation. + unsigned Offset = countTrailingZeros(CurrentChunk); + // Add the Offset to the adjusted base number of this chunk: Idx. + // This is the ID of the register class. + ID = Idx + Offset; + + // Consume the zeros, if any, and the bit we just read + // so that we are at the right spot for the next call. + // Do not do Offset + 1 because Offset may be 31 and 32 + // will be UB for the shift, though in that case we could + // have make the chunk being equal to 0, but that would + // have introduced a if statement. + moveNBits(Offset); + moveNBits(1); + } + + /// Move \p NumBits Bits forward in CurrentChunk. + void moveNBits(unsigned NumBits) { + assert(NumBits < 32 && "Undefined behavior spotted!"); + // Consume the bit we read for the next call. + CurrentChunk >>= NumBits; + // Adjust the base for the chunk. + Idx += NumBits; + } + +public: + /// Create a BitMaskClassIterator that visits all the register classes + /// represented by \p Mask. + /// + /// \pre \p Mask != nullptr + BitMaskClassIterator(const uint32_t *Mask, const TargetRegisterInfo &TRI) + : NumRegClasses(TRI.getNumRegClasses()), Mask(Mask), CurrentChunk(*Mask) { + // Move to the first ID. + moveToNextID(); + } + + /// Returns true if this iterator is still pointing at a valid entry. + bool isValid() const { return getID() != NumRegClasses; } + + /// Returns the current register class ID. + unsigned getID() const { return ID; } + + /// Advance iterator to the next entry. + void operator++() { + assert(isValid() && "Cannot move iterator past end."); + moveToNextID(); + } +}; + +// This is useful when building IndexedMaps keyed on virtual registers +struct VirtReg2IndexFunctor { + using argument_type = unsigned; + unsigned operator()(unsigned Reg) const { + return TargetRegisterInfo::virtReg2Index(Reg); + } +}; + +/// Prints virtual and physical registers with or without a TRI instance. +/// +/// The format is: +/// %noreg - NoRegister +/// %5 - a virtual register. +/// %5:sub_8bit - a virtual register with sub-register index (with TRI). +/// %eax - a physical register +/// %physreg17 - a physical register when no TRI instance given. +/// +/// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n'; +Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, + unsigned SubRegIdx = 0); + +/// Create Printable object to print register units on a \ref raw_ostream. +/// +/// Register units are named after their root registers: +/// +/// al - Single root. +/// fp0~st7 - Dual roots. +/// +/// Usage: OS << printRegUnit(Unit, TRI) << '\n'; +Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); + +/// \brief Create Printable object to print virtual registers and physical +/// registers on a \ref raw_ostream. +Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); + +/// \brief Create Printable object to print register classes or register banks +/// on a \ref raw_ostream. +Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_TARGETREGISTERINFO_H diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index f236679764688..1044f0bd27e6d 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -18,9 +18,9 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" -#include "llvm/Target/TargetSubtargetInfo.h" namespace llvm { @@ -116,7 +116,7 @@ public: return SchedModel.getProcResource(PIdx); } -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const char *getResourceName(unsigned PIdx) const { if (!PIdx) return "MOps"; diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h new file mode 100644 index 0000000000000..576522aef466e --- /dev/null +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -0,0 +1,255 @@ +//===- llvm/CodeGen/TargetSubtargetInfo.h - Target Information --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the subtarget options of a Target machine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H +#define LLVM_CODEGEN_TARGETSUBTARGETINFO_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/PBQPRAConstraint.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CodeGen.h" +#include <memory> +#include <vector> + + +namespace llvm { + +class CallLowering; +class InstrItineraryData; +struct InstrStage; +class InstructionSelector; +class LegalizerInfo; +class MachineInstr; +struct MachineSchedPolicy; +struct MCReadAdvanceEntry; +struct MCWriteLatencyEntry; +struct MCWriteProcResEntry; +class RegisterBankInfo; +class SDep; +class SelectionDAGTargetInfo; +struct SubtargetFeatureKV; +struct SubtargetInfoKV; +class SUnit; +class TargetFrameLowering; +class TargetInstrInfo; +class TargetLowering; +class TargetRegisterClass; +class TargetRegisterInfo; +class TargetSchedModel; +class Triple; + +//===----------------------------------------------------------------------===// +/// +/// TargetSubtargetInfo - Generic base class for all target subtargets. All +/// Target-specific options that control code generation and printing should +/// be exposed through a TargetSubtargetInfo-derived class. +/// +class TargetSubtargetInfo : public MCSubtargetInfo { +protected: // Can only create subclasses... + TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS, + ArrayRef<SubtargetFeatureKV> PF, + ArrayRef<SubtargetFeatureKV> PD, + const SubtargetInfoKV *ProcSched, + const MCWriteProcResEntry *WPR, + const MCWriteLatencyEntry *WL, + const MCReadAdvanceEntry *RA, const InstrStage *IS, + const unsigned *OC, const unsigned *FP); + +public: + // AntiDepBreakMode - Type of anti-dependence breaking that should + // be performed before post-RA scheduling. + using AntiDepBreakMode = enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL }; + using RegClassVector = SmallVectorImpl<const TargetRegisterClass *>; + + TargetSubtargetInfo() = delete; + TargetSubtargetInfo(const TargetSubtargetInfo &) = delete; + TargetSubtargetInfo &operator=(const TargetSubtargetInfo &) = delete; + ~TargetSubtargetInfo() override; + + virtual bool isXRaySupported() const { return false; } + + // Interfaces to the major aspects of target machine information: + // + // -- Instruction opcode and operand information + // -- Pipelines and scheduling information + // -- Stack frame information + // -- Selection DAG lowering information + // -- Call lowering information + // + // N.B. These objects may change during compilation. It's not safe to cache + // them between functions. + virtual const TargetInstrInfo *getInstrInfo() const { return nullptr; } + virtual const TargetFrameLowering *getFrameLowering() const { + return nullptr; + } + virtual const TargetLowering *getTargetLowering() const { return nullptr; } + virtual const SelectionDAGTargetInfo *getSelectionDAGInfo() const { + return nullptr; + } + virtual const CallLowering *getCallLowering() const { return nullptr; } + + // FIXME: This lets targets specialize the selector by subtarget (which lets + // us do things like a dedicated avx512 selector). However, we might want + // to also specialize selectors by MachineFunction, which would let us be + // aware of optsize/optnone and such. + virtual const InstructionSelector *getInstructionSelector() const { + return nullptr; + } + + virtual unsigned getHwMode() const { return 0; } + + /// Target can subclass this hook to select a different DAG scheduler. + virtual RegisterScheduler::FunctionPassCtor + getDAGScheduler(CodeGenOpt::Level) const { + return nullptr; + } + + virtual const LegalizerInfo *getLegalizerInfo() const { return nullptr; } + + /// getRegisterInfo - If register information is available, return it. If + /// not, return null. + virtual const TargetRegisterInfo *getRegisterInfo() const { return nullptr; } + + /// If the information for the register banks is available, return it. + /// Otherwise return nullptr. + virtual const RegisterBankInfo *getRegBankInfo() const { return nullptr; } + + /// getInstrItineraryData - Returns instruction itinerary data for the target + /// or specific subtarget. + virtual const InstrItineraryData *getInstrItineraryData() const { + return nullptr; + } + + /// Resolve a SchedClass at runtime, where SchedClass identifies an + /// MCSchedClassDesc with the isVariant property. This may return the ID of + /// another variant SchedClass, but repeated invocation must quickly terminate + /// in a nonvariant SchedClass. + virtual unsigned resolveSchedClass(unsigned SchedClass, + const MachineInstr *MI, + const TargetSchedModel *SchedModel) const { + return 0; + } + + /// \brief True if the subtarget should run MachineScheduler after aggressive + /// coalescing. + /// + /// This currently replaces the SelectionDAG scheduler with the "source" order + /// scheduler (though see below for an option to turn this off and use the + /// TargetLowering preference). It does not yet disable the postRA scheduler. + virtual bool enableMachineScheduler() const; + + /// \brief Support printing of [latency:throughput] comment in output .S file. + virtual bool supportPrintSchedInfo() const { return false; } + + /// \brief True if the machine scheduler should disable the TLI preference + /// for preRA scheduling with the source level scheduler. + virtual bool enableMachineSchedDefaultSched() const { return true; } + + /// \brief True if the subtarget should enable joining global copies. + /// + /// By default this is enabled if the machine scheduler is enabled, but + /// can be overridden. + virtual bool enableJoinGlobalCopies() const; + + /// True if the subtarget should run a scheduler after register allocation. + /// + /// By default this queries the PostRAScheduling bit in the scheduling model + /// which is the preferred way to influence this. + virtual bool enablePostRAScheduler() const; + + /// \brief True if the subtarget should run the atomic expansion pass. + virtual bool enableAtomicExpand() const; + + /// \brief Override generic scheduling policy within a region. + /// + /// This is a convenient way for targets that don't provide any custom + /// scheduling heuristics (no custom MachineSchedStrategy) to make + /// changes to the generic scheduling policy. + virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const {} + + // \brief Perform target specific adjustments to the latency of a schedule + // dependency. + virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {} + + // For use with PostRAScheduling: get the anti-dependence breaking that should + // be performed before post-RA scheduling. + virtual AntiDepBreakMode getAntiDepBreakMode() const { return ANTIDEP_NONE; } + + // For use with PostRAScheduling: in CriticalPathRCs, return any register + // classes that should only be considered for anti-dependence breaking if they + // are on the critical path. + virtual void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + return CriticalPathRCs.clear(); + } + + // \brief Provide an ordered list of schedule DAG mutations for the post-RA + // scheduler. + virtual void getPostRAMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + } + + // \brief Provide an ordered list of schedule DAG mutations for the machine + // pipeliner. + virtual void getSMSMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + } + + // For use with PostRAScheduling: get the minimum optimization level needed + // to enable post-RA scheduling. + virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Default; + } + + /// \brief True if the subtarget should run the local reassignment + /// heuristic of the register allocator. + /// This heuristic may be compile time intensive, \p OptLevel provides + /// a finer grain to tune the register allocator. + virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const; + + /// \brief True if the subtarget should consider the cost of local intervals + /// created by a split candidate when choosing the best split candidate. This + /// heuristic may be compile time intensive. + virtual bool enableAdvancedRASplitCost() const; + + /// \brief Enable use of alias analysis during code generation (during MI + /// scheduling, DAGCombine, etc.). + virtual bool useAA() const; + + /// \brief Enable the use of the early if conversion pass. + virtual bool enableEarlyIfConversion() const { return false; } + + /// \brief Return PBQPConstraint(s) for the target. + /// + /// Override to provide custom PBQP constraints. + virtual std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const { + return nullptr; + } + + /// Enable tracking of subregister liveness in register allocator. + /// Please use MachineRegisterInfo::subRegLivenessEnabled() instead where + /// possible. + virtual bool enableSubRegLiveness() const { return false; } + + /// Returns string representation of scheduler comment + std::string getSchedInfoStr(const MachineInstr &MI) const override; + std::string getSchedInfoStr(MCInst const &MCI) const override; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index b1e62daa5aaeb..73c7fb4ce4b37 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -40,110 +40,111 @@ def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value def v16i1 : ValueType<16, 18>; // 16 x i1 vector value def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value -def v512i1 : ValueType<512, 21>; // 512 x i1 vector value -def v1024i1: ValueType<1024,22>; //1024 x i1 vector value - -def v1i8 : ValueType<8, 23>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 24>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 25>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 26>; // 8 x i8 vector value -def v16i8 : ValueType<128, 27>; // 16 x i8 vector value -def v32i8 : ValueType<256, 28>; // 32 x i8 vector value -def v64i8 : ValueType<512, 29>; // 64 x i8 vector value -def v128i8 : ValueType<1024,30>; //128 x i8 vector value -def v256i8 : ValueType<2048,31>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 32>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 33>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 34>; // 4 x i16 vector value -def v8i16 : ValueType<128, 35>; // 8 x i16 vector value -def v16i16 : ValueType<256, 36>; // 16 x i16 vector value -def v32i16 : ValueType<512, 37>; // 32 x i16 vector value -def v64i16 : ValueType<1024,38>; // 64 x i16 vector value -def v128i16: ValueType<2048,39>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 40>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 41>; // 2 x i32 vector value -def v4i32 : ValueType<128, 42>; // 4 x i32 vector value -def v8i32 : ValueType<256, 43>; // 8 x i32 vector value -def v16i32 : ValueType<512, 44>; // 16 x i32 vector value -def v32i32 : ValueType<1024,45>; // 32 x i32 vector value -def v64i32 : ValueType<2048,46>; // 32 x i32 vector value - -def v1i64 : ValueType<64 , 47>; // 1 x i64 vector value -def v2i64 : ValueType<128, 48>; // 2 x i64 vector value -def v4i64 : ValueType<256, 49>; // 4 x i64 vector value -def v8i64 : ValueType<512, 50>; // 8 x i64 vector value -def v16i64 : ValueType<1024,51>; // 16 x i64 vector value -def v32i64 : ValueType<2048,52>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 53>; // 1 x i128 vector value - -def nxv1i1 : ValueType<1, 54>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 55>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 56>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 57>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 58>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 59>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 60>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 61>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 62>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 63>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 64>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 65>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 66>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 67>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 68>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 69>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 70>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 71>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 72>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 73>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 74>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 75>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 76>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,77>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 78>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 79>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 80>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 81>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,82>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,83>; // n x 32 x i64 vector value - -def v2f16 : ValueType<32 , 84>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 85>; // 4 x f16 vector value -def v8f16 : ValueType<128, 86>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 87>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 88>; // 2 x f32 vector value -def v4f32 : ValueType<128, 89>; // 4 x f32 vector value -def v8f32 : ValueType<256, 90>; // 8 x f32 vector value -def v16f32 : ValueType<512, 91>; // 16 x f32 vector value -def v1f64 : ValueType<64, 92>; // 1 x f64 vector value -def v2f64 : ValueType<128, 93>; // 2 x f64 vector value -def v4f64 : ValueType<256, 94>; // 4 x f64 vector value -def v8f64 : ValueType<512, 95>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 96>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 97>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 98>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 99>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 100>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 101>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 102>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 103>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 104>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 105>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 106>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 107>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 108>; // X86 MMX value -def FlagVT : ValueType<0 , 109>; // Pre-RA sched glue -def isVoid : ValueType<0 , 110>; // Produces no value -def untyped: ValueType<8 , 111>; // Produces an untyped value +def v128i1 : ValueType<128, 21>; // 128 x i1 vector value +def v512i1 : ValueType<512, 22>; // 512 x i1 vector value +def v1024i1: ValueType<1024,23>; //1024 x i1 vector value + +def v1i8 : ValueType<8, 24>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value +def v16i8 : ValueType<128, 28>; // 16 x i8 vector value +def v32i8 : ValueType<256, 29>; // 32 x i8 vector value +def v64i8 : ValueType<512, 30>; // 64 x i8 vector value +def v128i8 : ValueType<1024,31>; //128 x i8 vector value +def v256i8 : ValueType<2048,32>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value +def v8i16 : ValueType<128, 36>; // 8 x i16 vector value +def v16i16 : ValueType<256, 37>; // 16 x i16 vector value +def v32i16 : ValueType<512, 38>; // 32 x i16 vector value +def v64i16 : ValueType<1024,39>; // 64 x i16 vector value +def v128i16: ValueType<2048,40>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value +def v4i32 : ValueType<128, 43>; // 4 x i32 vector value +def v8i32 : ValueType<256, 44>; // 8 x i32 vector value +def v16i32 : ValueType<512, 45>; // 16 x i32 vector value +def v32i32 : ValueType<1024,46>; // 32 x i32 vector value +def v64i32 : ValueType<2048,47>; // 32 x i32 vector value + +def v1i64 : ValueType<64 , 48>; // 1 x i64 vector value +def v2i64 : ValueType<128, 49>; // 2 x i64 vector value +def v4i64 : ValueType<256, 50>; // 4 x i64 vector value +def v8i64 : ValueType<512, 51>; // 8 x i64 vector value +def v16i64 : ValueType<1024,52>; // 16 x i64 vector value +def v32i64 : ValueType<2048,53>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 54>; // 1 x i128 vector value + +def nxv1i1 : ValueType<1, 55>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 56>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 57>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 58>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 59>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 60>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 61>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 62>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 63>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 64>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 65>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 66>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 67>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 68>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 69>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 70>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 71>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 72>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 73>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 74>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 75>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 76>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 77>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,78>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 79>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 80>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 81>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 82>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,83>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,84>; // n x 32 x i64 vector value + +def v2f16 : ValueType<32 , 85>; // 2 x f16 vector value +def v4f16 : ValueType<64 , 86>; // 4 x f16 vector value +def v8f16 : ValueType<128, 87>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 88>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 89>; // 2 x f32 vector value +def v4f32 : ValueType<128, 90>; // 4 x f32 vector value +def v8f32 : ValueType<256, 91>; // 8 x f32 vector value +def v16f32 : ValueType<512, 92>; // 16 x f32 vector value +def v1f64 : ValueType<64, 93>; // 1 x f64 vector value +def v2f64 : ValueType<128, 94>; // 2 x f64 vector value +def v4f64 : ValueType<256, 95>; // 4 x f64 vector value +def v8f64 : ValueType<512, 96>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 97>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 98>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 99>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 100>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 101>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 102>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 103>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 104>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 105>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 106>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 107>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 108>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 109>; // X86 MMX value +def FlagVT : ValueType<0 , 110>; // Pre-RA sched glue +def isVoid : ValueType<0 , 111>; // Produces no value +def untyped: ValueType<8 , 112>; // Produces an untyped value def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index b9076353fd07d..3b06f03931147 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===// +//===- llvm/CodeGen/VirtRegMap.h - Virtual Register Map ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,15 +19,17 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include <cassert> namespace llvm { - class MachineInstr; - class MachineFunction; - class MachineRegisterInfo; - class TargetInstrInfo; - class raw_ostream; - class SlotIndexes; + +class MachineFunction; +class MachineRegisterInfo; +class raw_ostream; +class TargetInstrInfo; class VirtRegMap : public MachineFunctionPass { public: @@ -63,13 +65,14 @@ namespace llvm { /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); - VirtRegMap(const VirtRegMap&) = delete; - void operator=(const VirtRegMap&) = delete; - public: static char ID; + VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { } + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {} + VirtRegMap(const VirtRegMap &) = delete; + VirtRegMap &operator=(const VirtRegMap &) = delete; + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -166,6 +169,7 @@ namespace llvm { /// @brief create a mapping for the specifed virtual register to /// the next available stack slot int assignVirt2StackSlot(unsigned virtReg); + /// @brief create a mapping for the specified virtual register to /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); @@ -178,6 +182,7 @@ namespace llvm { VRM.print(OS); return OS; } -} // End llvm namespace -#endif +} // end llvm namespace + +#endif // LLVM_CODEGEN_VIRTREGMAP_H |