diff options
Diffstat (limited to 'lib/CodeGen')
41 files changed, 860 insertions, 323 deletions
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 344136b1f1956..aa9c8e94d08a3 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -361,7 +361,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *NewLI = Builder.CreateLoad(NewAddr); NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); - NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); @@ -444,7 +444,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); NewSI->setAlignment(SI->getAlignment()); NewSI->setVolatile(SI->isVolatile()); - NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; @@ -801,7 +801,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), - CI->getFailureOrdering(), CI->getSynchScope()); + CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong @@ -924,7 +924,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), - CI->getSynchScope()); + CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index faa5f139cf7b3..b7fd45a3f6a66 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -78,6 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); + initializeRAFastPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index b50e76f2e3ba2..b7155ac2480a7 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -4270,6 +4270,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *Consensus = nullptr; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; + bool PhiSeen = false; SmallVector<Instruction*, 16> AddrModeInsts; ExtAddrMode AddrMode; TypePromotionTransaction TPT(RemovedInsts); @@ -4289,6 +4290,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (PHINode *P = dyn_cast<PHINode>(V)) { for (Value *IncValue : P->incoming_values()) worklist.push_back(IncValue); + PhiSeen = true; continue; } @@ -4342,9 +4344,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, TPT.commit(); // If all the instructions matched are already in this BB, don't do anything. - if (none_of(AddrModeInsts, [&](Value *V) { + // If we saw Phi node then it is not local definitely. + if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); - })) { + })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -4390,6 +4393,20 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrMode.Scale = 0; } + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + // + // (See below for code to add the scale.) + if (AddrMode.Scale) { + Type *ScaledRegTy = AddrMode.ScaledReg->getType(); + if (cast<IntegerType>(IntPtrTy)->getBitWidth() > + cast<IntegerType>(ScaledRegTy)->getBitWidth()) + return false; + } + if (AddrMode.BaseGV) { if (ResultPtr) return false; @@ -4440,19 +4457,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. - } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < - cast<IntegerType>(V->getType())->getBitWidth()) { - V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - // It is only safe to sign extend the BaseReg if we know that the math - // required to create it did not overflow before we extend it. Since - // the original IR value was tossed in favor of a constant back when - // the AddrMode was created we need to bail out gracefully if widths - // do not match instead of extending it. - Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); - if (I && (ResultIndex != AddrMode.BaseReg)) - I->eraseFromParent(); - return false; + assert(cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth() && + "We can't transform if ScaledReg is too narrow"); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 521037f9d206b..ed1bd995e60be 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -345,7 +345,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), Flags, DL->getTypeStoreSize(LI.getType()), getMemOpAlignment(LI), AAMDNodes(), nullptr, - LI.getSynchScope(), LI.getOrdering())); + LI.getSyncScopeID(), LI.getOrdering())); return true; } @@ -363,7 +363,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand( MachinePointerInfo(SI.getPointerOperand()), Flags, DL->getTypeStoreSize(SI.getValueOperand()->getType()), - getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(), + getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), SI.getOrdering())); return true; } diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 860fc9a4f8b61..bf427225d6a96 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -16,7 +16,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/IR/Constants.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -26,6 +30,9 @@ using namespace llvm; +InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) + : Renderers(MaxRenderers, nullptr), MIs() {} + InstructionSelector::InstructionSelector() = default; bool InstructionSelector::constrainOperandRegToRegClass( diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 84b0a0ac41579..49fb5e8f075b8 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -99,23 +99,19 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } -LegalizerHelper::LegalizeResult llvm::replaceWithLibcall( - MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, - const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) { +LegalizerHelper::LegalizeResult +llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef<CallLowering::ArgInfo> Args) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - MIRBuilder.setInstr(MI); if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), MachineOperand::CreateES(Name), Result, Args)) return LegalizerHelper::UnableToLegalize; - // We're about to remove MI, so move the insert point after it. - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), - std::next(MIRBuilder.getInsertPt())); - - MI.eraseFromParent(); return LegalizerHelper::Legalized; } @@ -123,10 +119,9 @@ static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); - return replaceWithLibcall(MI, MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); } LegalizerHelper::LegalizeResult @@ -135,6 +130,8 @@ LegalizerHelper::libcall(MachineInstr &MI) { unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -143,15 +140,24 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { Type *HLTy = Type::getInt32Ty(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } } + + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 47c6214c05528..4636806c3f081 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -166,19 +166,24 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, .addGlobalAddress(GV); } -MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, +MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1) { assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && "invalid operand type"); assert(MRI->getType(Res) == MRI->getType(Op0) && MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - return buildInstr(TargetOpcode::G_ADD) + return buildInstr(Opcode) .addDef(Res) .addUse(Op0) .addUse(Op1); } +MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1); +} + MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, unsigned Op1) { assert(MRI->getType(Res).isPointer() && @@ -222,41 +227,22 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_SUB) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_MUL) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1); +} - return buildInstr(TargetOpcode::G_AND) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); +MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index 3746b74e0528a..f9ba4ffa6527c 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -67,7 +67,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { } } -void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { +void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 1f1ce6e8d7250..58a655a4dee4f 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -365,6 +365,14 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token, return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); } +static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '"') + return None; + return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, + ErrorCallback); +} + static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { auto Range = C; C.advance(); // Skip '%' @@ -630,6 +638,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) + return R.remaining(); Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 3e9513111bf4f..08b82e59c4fc1 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -127,7 +127,8 @@ struct MIToken { NamedIRValue, IRValue, QuotedIRValue, // `<constant value>` - SubRegisterIndex + SubRegisterIndex, + StringConstant }; private: @@ -168,7 +169,8 @@ public: bool isMemoryOperandFlag() const { return Kind == kw_volatile || Kind == kw_non_temporal || - Kind == kw_dereferenceable || Kind == kw_invariant; + Kind == kw_dereferenceable || Kind == kw_invariant || + Kind == StringConstant; } bool is(TokenKind K) const { return Kind == K; } diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c58d192284dd0..c68d87b15a317 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -141,6 +141,8 @@ class MIParser { StringMap<unsigned> Names2DirectTargetFlags; /// Maps from direct target flag names to the bitmask target flag values. StringMap<unsigned> Names2BitmaskTargetFlags; + /// Maps from MMO target flag names to MMO target flag values. + StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -229,6 +231,7 @@ public: bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID); bool parseOptionalAtomicOrdering(AtomicOrdering &Order); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); @@ -318,6 +321,18 @@ private: /// /// Return true if the name isn't a name of a bitmask target flag. bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2MMOTargetFlags(); + + /// Try to convert a name of a MachineMemOperand target flag to the + /// corresponding target flag. + /// + /// Return true if the name isn't a name of a target MMO flag. + bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag); + + /// parseStringConstant + /// ::= StringConstant + bool parseStringConstant(std::string &Result); }; } // end anonymous namespace @@ -2034,7 +2049,14 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { case MIToken::kw_invariant: Flags |= MachineMemOperand::MOInvariant; break; - // TODO: parse the target specific memory operand flags. + case MIToken::StringConstant: { + MachineMemOperand::Flags TF; + if (getMMOTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target MMO flag '" + Token.stringValue() + + "'"); + Flags |= TF; + break; + } default: llvm_unreachable("The current token should be a memory operand flag"); } @@ -2135,6 +2157,26 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { return false; } +bool MIParser::parseOptionalScope(LLVMContext &Context, + SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") { + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected '(' in syncscope"); + + std::string SSN; + if (parseStringConstant(SSN)) + return true; + + SSID = Context.getOrInsertSyncScopeID(SSN); + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' in syncscope"); + } + + return false; +} + bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) { Order = AtomicOrdering::NotAtomic; if (Token.isNot(MIToken::Identifier)) @@ -2174,12 +2216,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { Flags |= MachineMemOperand::MOStore; lex(); - // Optional "singlethread" scope. - SynchronizationScope Scope = SynchronizationScope::CrossThread; - if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") { - Scope = SynchronizationScope::SingleThread; - lex(); - } + // Optional synchronization scope. + SyncScope::ID SSID; + if (parseOptionalScope(MF.getFunction()->getContext(), SSID)) + return true; // Up to two atomic orderings (cmpxchg provides guarantees on failure). AtomicOrdering Order, FailureOrder; @@ -2244,7 +2284,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (expectAndConsume(MIToken::rparen)) return true; Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range, - Scope, Order, FailureOrder); + SSID, Order, FailureOrder); return false; } @@ -2457,6 +2497,35 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { return false; } +void MIParser::initNames2MMOTargetFlags() { + if (!Names2MMOTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) + Names2MMOTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getMMOTargetFlag(StringRef Name, + MachineMemOperand::Flags &Flag) { + initNames2MMOTargetFlags(); + auto FlagInfo = Names2MMOTargetFlags.find(Name); + if (FlagInfo == Names2MMOTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool MIParser::parseStringConstant(std::string &Result) { + if (Token.isNot(MIToken::StringConstant)) + return error("expected string constant"); + Result = Token.stringValue(); + lex(); + return false; +} + bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, StringRef Src, SMDiagnostic &Error) { diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index c524a9835f338..ddeacf1d1bfb1 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -139,6 +140,8 @@ class MIPrinter { ModuleSlotTracker &MST; const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds; const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping; + /// Synchronization scope names registered with LLVMContext. + SmallVector<StringRef, 8> SSNs; bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const; bool canPredictSuccessors(const MachineBasicBlock &MBB) const; @@ -162,7 +165,9 @@ public: void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef = false); - void print(const MachineMemOperand &Op); + void print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op); + void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; @@ -731,11 +736,12 @@ void MIPrinter::print(const MachineInstr &MI) { if (!MI.memoperands_empty()) { OS << " :: "; + const LLVMContext &Context = MF->getFunction()->getContext(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; - print(*Op); + print(Context, *TII, *Op); NeedComma = true; } } @@ -1031,9 +1037,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, } } -void MIPrinter::print(const MachineMemOperand &Op) { +static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, + unsigned TMMOFlag) { + auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TMMOFlag) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op) { OS << '('; - // TODO: Print operand's target specific flags. if (Op.isVolatile()) OS << "volatile "; if (Op.isNonTemporal()) @@ -1042,6 +1059,15 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "dereferenceable "; if (Op.isInvariant()) OS << "invariant "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag1) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag2) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) + << "\" "; if (Op.isLoad()) OS << "load "; else { @@ -1049,8 +1075,7 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "store "; } - if (Op.getSynchScope() == SynchronizationScope::SingleThread) - OS << "singlethread "; + printSyncScope(Context, Op.getSyncScopeID()); if (Op.getOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(Op.getOrdering()) << ' '; @@ -1119,6 +1144,23 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << ')'; } +void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); + + OS << "syncscope(\""; + PrintEscapedString(SSNs[SSID], OS); + OS << "\") "; + break; + } + } +} + static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, const TargetRegisterInfo *TRI) { int Reg = TRI->getLLVMRegNum(DwarfReg, true); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 2d4b95974cc64..447ad629885bf 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1917,6 +1917,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, return; MachineBasicBlock *Top = *LoopChain.begin(); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + + // If ExitingBB is already the last one in a chain then nothing to do. + if (Bottom == ExitingBB) + return; + bool ViableTopFallthrough = false; for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1931,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock *Succ : Bottom->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; if (!LoopBlockSet.count(Succ) && @@ -1944,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; + // Rotating a loop exit to the bottom when there is a fallthrough to top + // trades the entry fallthrough for an exit fallthrough. + // If there is no bottom->top edge, but the chosen exit block does have + // a fallthrough, we break that fallthrough for nothing in return. + + // Let's consider an example. We have a built chain of basic blocks + // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. + // By doing a rotation we get + // Bk+1, ..., Bn, B1, ..., Bk + // Break of fallthrough to B1 is compensated by a fallthrough from Bk. + // If we had a fallthrough Bk -> Bk+1 it is broken now. + // It might be compensated by fallthrough Bn -> B1. + // So we have a condition to avoid creation of extra branch by loop rotation. + // All below must be true to avoid loop rotation: + // If there is a fallthrough to top (B1) + // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) + // There is no fallthrough from bottom (Bn) to top (B1). + // Please note that there is no exit fallthrough from Bn because we checked it + // above. + if (ViableTopFallthrough) { + assert(std::next(ExitIt) != LoopChain.end() && + "Exit should not be last BB"); + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + if (ExitingBB->isSuccessor(NextBlockInChain)) + if (!Bottom->isSuccessor(Top)) + return; + } + + DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index bbdae6e1a49e5..f88e175a97762 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -305,11 +305,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand *MachineFunction::getMachineMemOperand( MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, AtomicOrdering Ordering, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges, - SynchScope, Ordering, FailureOrdering); + SSID, Ordering, FailureOrdering); } MachineMemOperand * @@ -320,13 +320,13 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -359,7 +359,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustLoad; } @@ -393,7 +393,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustStore; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 81c6dace92e04..afea5575a3ae5 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -447,6 +447,14 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallString<16> Str; getFPImm()->getValueAPF().toString(Str); OS << "quad " << Str; + } else if (getFPImm()->getType()->isX86_FP80Ty()) { + APFloat APF = getFPImm()->getValueAPF(); + OS << "x86_fp80 0xK"; + APInt API = APF.bitcastToAPInt(); + OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); } else { OS << getFPImm()->getValueAPF().convertToDouble(); } @@ -606,7 +614,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), @@ -617,8 +625,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); - AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope); - assert(getSynchScope() == SynchScope && "Value truncated"); + AtomicInfo.SSID = static_cast<unsigned>(SSID); + assert(getSyncScopeID() == SSID && "Value truncated"); AtomicInfo.Ordering = static_cast<unsigned>(Ordering); assert(getOrdering() == Ordering && "Value truncated"); AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering); @@ -744,6 +752,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { OS << "(dereferenceable)"; if (isInvariant()) OS << "(invariant)"; + if (getFlags() & MOTargetFlag1) + OS << "(flag1)"; + if (getFlags() & MOTargetFlag2) + OS << "(flag2)"; + if (getFlags() & MOTargetFlag3) + OS << "(flag3)"; } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index e65c256c1bb5a..fcb544806dda0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -985,6 +985,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); + else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + const MachineOperand &MOTied = MI->getOperand(TiedTo); + if (!MOTied.isReg()) + report("Tied counterpart must be a register", &MOTied, TiedTo); + else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + MO->getReg() != MOTied.getReg()) + report("Tied physical registers must match.", &MOTied, TiedTo); + } } else if (MO->isReg() && MO->isTied()) report("Explicit operand should not be tied", MO, MONum); } else { diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 5e279b065bbda..633a853b2c748 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" STATISTIC(NumFused, "Number of instr pairs fused"); diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 425a59dc03752..4a50d895340a5 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -23,7 +23,7 @@ /// This pass traverses all the instructions in a program in top-down order. /// In contrast to the instruction scheduling passes, this pass never resets /// the hazard recognizer to ensure it can correctly handles noop hazards at -/// the begining of blocks. +/// the beginning of blocks. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index c606b7b833104..d5538be4bba25 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -203,6 +203,8 @@ namespace { char RAFast::ID = 0; } +INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false) + /// getStackSpaceFor - This allocates space for the specified virtual register /// to be held on the stack. int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { @@ -244,8 +246,15 @@ void RAFast::addKillFlag(const LiveReg &LR) { if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { if (MO.getReg() == LR.PhysReg) MO.setIsKill(); - else - LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + // else, don't do anything we are problably redefining a + // subreg of this register and given we don't track which + // lanes are actually dead, we cannot insert a kill flag here. + // Otherwise we may end up in a situation like this: + // ... = (MO) physreg:sub1, physreg <implicit-use, kill> + // ... <== Here we would allow later pass to reuse physreg:sub1 + // which is potentially wrong. + // LR:sub0 = ... + // ... = LR.sub1 <== This is going to use physreg:sub1 } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 9562652556acb..020e81eca2dd2 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -2458,7 +2458,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { do { Reg = RecoloringCandidates.pop_back_val(); - // We cannot recolor physcal register. + // We cannot recolor physical register. if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index e3baff4be4bcf..9778103575fab 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -924,5 +924,3 @@ FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { return createPBQPRegisterAllocator(); } - -#undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index ff9bca092dbe5..a67d07b36474a 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1227,6 +1227,34 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR->createDeadDef(DefIndex, Alloc); } } + + // Make sure that the subrange for resultant undef is removed + // For example: + // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1 + // vreg2<def> = COPY vreg1 + // ==> + // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1 + // ; Correct but need to remove the subrange for vreg2:sub0 + // ; as it is now undef + if (NewIdx != 0 && DstInt.hasSubRanges()) { + // The affected subregister segments can be removed. + SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); + bool UpdatedSubRanges = false; + for (LiveInterval::SubRange &SR : DstInt.subranges()) { + if ((SR.LaneMask & DstMask).none()) { + DEBUG(dbgs() << "Removing undefined SubRange " + << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); + // VNI is in ValNo - remove any segments in this SubRange that have this ValNo + if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { + SR.removeValNo(RmValNo); + UpdatedSubRanges = true; + } + } + } + if (UpdatedSubRanges) + DstInt.removeEmptySubRanges(); + } } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 05e641d9489d9..fc5105aadbffd 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -375,7 +375,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, static std::pair<MCPhysReg, MachineBasicBlock::iterator> findSurvivorBackwards(const MachineRegisterInfo &MRI, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, - const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) { + const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder, + bool RestoreAfter) { bool FoundTo = false; MCPhysReg Survivor = 0; MachineBasicBlock::iterator Pos; @@ -388,7 +389,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, for (MachineBasicBlock::iterator I = From;; --I) { const MachineInstr &MI = *I; - Used.accumulateBackward(MI); + Used.accumulate(MI); if (I == To) { // See if one of the registers in RC wasn't used so far. @@ -401,6 +402,11 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // the register which is not defined/used for the longest time. FoundTo = true; Pos = To; + // Note: It was fine so far to start our search at From, however now that + // we have to spill, and can only place the restore after From then + // add the regs used/defed by std::next(From) to the set. + if (RestoreAfter) + Used.accumulate(*std::next(From)); } if (FoundTo) { if (Survivor == 0 || !Used.available(Survivor)) { @@ -575,7 +581,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator UseMI; ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF); std::pair<MCPhysReg, MachineBasicBlock::iterator> P = - findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder); + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder, + RestoreAfter); MCPhysReg Reg = P.first; MachineBasicBlock::iterator SpillBefore = P.second; assert(Reg != 0 && "No register left to scavenge!"); @@ -626,7 +633,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, assert(RealDef != nullptr && "Must have at least 1 Def"); #endif - // We should only have one definition of the register. However to accomodate + // We should only have one definition of the register. However to accommodate // the requirements of two address code we also allow definitions in // subsequent instructions provided they also read the register. That way // we get a single contiguous lifetime. diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3cd270cec3a6d..5e95f760aaa24 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -67,6 +67,41 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { return &TII->get(Node->getMachineOpcode()); } +LLVM_DUMP_METHOD +raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + switch (getKind()) { + case Data: OS << "Data"; break; + case Anti: OS << "Anti"; break; + case Output: OS << "Out "; break; + case Order: OS << "Ord "; break; + } + + switch (getKind()) { + case Data: + OS << " Latency=" << getLatency(); + if (TRI && isAssignedRegDep()) + OS << " Reg=" << PrintReg(getReg(), TRI); + break; + case Anti: + case Output: + OS << " Latency=" << getLatency(); + break; + case Order: + OS << " Latency=" << getLatency(); + switch(Contents.OrdKind) { + case Barrier: OS << " Barrier"; break; + case MayAliasMem: + case MustAliasMem: OS << " Memory"; break; + case Artificial: OS << " Artificial"; break; + case Weak: OS << " Weak"; break; + case Cluster: OS << " Cluster"; break; + } + break; + } + + return OS; +} + bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SDep &PredDep : Preds) { @@ -302,16 +337,24 @@ void SUnit::biasCriticalPath() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const { - if (this == &DAG->ExitSU) - OS << "ExitSU"; - else if (this == &DAG->EntrySU) +raw_ostream &SUnit::print(raw_ostream &OS, + const SUnit *Entry, const SUnit *Exit) const { + if (this == Entry) OS << "EntrySU"; + else if (this == Exit) + OS << "ExitSU"; else OS << "SU(" << NodeNum << ")"; + return OS; +} + +LLVM_DUMP_METHOD +raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { + return print(OS, &G->EntrySU, &G->ExitSU); } -LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const { +LLVM_DUMP_METHOD +void SUnit::dump(const ScheduleDAG *G) const { print(dbgs(), G); dbgs() << ": "; G->dumpNode(this); @@ -333,40 +376,18 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; - for (const SDep &SuccDep : Preds) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Preds) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } if (Succs.size() != 0) { dbgs() << " Successors:\n"; - for (const SDep &SuccDep : Succs) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Succs) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 0f70b0e9ca077..ccd937950a743 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -63,7 +63,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d901af7276860..71382c18fdf9d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -400,6 +400,7 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); @@ -5267,14 +5268,40 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { } SDValue DAGCombiner::visitRotate(SDNode *N) { + SDLoc dl(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold (rot x, 0) -> x + if (isNullConstantOrNullSplatConstant(N1)) + return N0; + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). - if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && - N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - if (SDValue NewOp1 = - distributeTruncateThroughAnd(N->getOperand(1).getNode())) - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), NewOp1); - } + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); + } + + unsigned NextOp = N0.getOpcode(); + // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) + if (SDNode *C2 = + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) { + unsigned Bitsize = VT.getScalarSizeInBits(); + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode()); + return DAG.getNode( + N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); + } + } return SDValue(); } @@ -6091,19 +6118,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); + SDLoc DL(N); // fold (select C, X, X) -> X if (N1 == N2) return N1; + if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { // fold (select true, X, Y) -> X // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } + // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or C, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::OR, DL, VT, N0, N2); if (SDValue V = foldSelectOfConstants(N)) return V; @@ -6112,22 +6142,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); } // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::AND, DL, VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) - return SDValue(N, 0); // Don't revisit N. + return SDValue(N, 0); // Don't revisit N. if (VT0 == MVT::i1) { // The code in this block deals with the following 2 equivalences: @@ -6138,27 +6168,27 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. - bool normalizeToSequence - = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + bool normalizeToSequence = + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, InnerSelect, N2); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, InnerSelect); } @@ -6170,15 +6200,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), - N0, N1_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, - N1_1, N2); + SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); } // Otherwise see if we can optimize the "and" to a better pattern. if (SDValue Combined = visitANDLike(N0, N1_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1_1, N2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, + N2); } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y @@ -6189,15 +6217,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { - SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), - N0, N2_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, - N1, N2_2); + SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1, N2_2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, + N2_2); } } } @@ -6208,8 +6234,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { SDValue Cond0 = N0->getOperand(0); if (C->isOne()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), - Cond0, N2, N1); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); } } } @@ -6226,24 +6251,21 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && + if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), - N0.getOperand(1), N1, N2, CC, - TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum( + DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) return FMinMax; } if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); + return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, N0.getOperand(2)); + return SimplifySelect(DL, N0, N1, N2); } return SDValue(); @@ -11045,7 +11067,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the - // indexed load/store and the expresion that needs to be re-written. + // indexed load/store and the expression that needs to be re-written. // // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 @@ -11379,7 +11401,7 @@ namespace { /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// -/// Then, it will be rewriten into: +/// Then, it will be rewritten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// @@ -12694,7 +12716,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12706,7 +12728,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12723,7 +12745,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { !NoVectors) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12781,7 +12804,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12898,7 +12922,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12912,7 +12936,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12926,7 +12950,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, @@ -14228,6 +14252,73 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations which can be matched to a truncate. +SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { + // TODO: Add support for big-endian. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + if (N->getNumOperands() < 2) + return SDValue(); + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElems = N->getNumOperands(); + + if (!isTypeLegal(VT)) + return SDValue(); + + // If the input is something other than an EXTRACT_VECTOR_ELT with a constant + // index, bail out. + // TODO: Allow undef elements in some cases? + if (any_of(N->ops(), [VT](SDValue Op) { + return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1)) || + Op.getValueType() != VT.getVectorElementType(); + })) + return SDValue(); + + // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index + auto GetExtractIdx = [](SDValue Extract) { + return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue(); + }; + + // The first BUILD_VECTOR operand must be an an extract from index zero + // (assuming no undef and little-endian). + if (GetExtractIdx(N->getOperand(0)) != 0) + return SDValue(); + + // Compute the stride from the first index. + int Stride = GetExtractIdx(N->getOperand(1)); + SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); + + // Proceed only if the stride and the types can be matched to a truncate. + if ((Stride == 1 || !isPowerOf2_32(Stride)) || + (ExtractedFromVec.getValueType().getVectorNumElements() != + Stride * NumElems) || + (VT.getScalarSizeInBits() * Stride > 64)) + return SDValue(); + + // Check remaining operands are consistent with the computed stride. + for (unsigned i = 1; i != NumElems; ++i) { + SDValue Op = N->getOperand(i); + + if ((Op.getOperand(0) != ExtractedFromVec) || + (GetExtractIdx(Op) != Stride * i)) + return SDValue(); + } + + // All checks were ok, construct the truncate. + LLVMContext &Ctx = *DAG.getContext(); + EVT NewVT = VT.getVectorVT( + Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); + EVT TruncVT = + VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; + + SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec); + Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); + return DAG.getBitcast(VT, Res); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14270,6 +14361,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; + if (TLI.isDesirableToCombineBuildVectorToTruncate()) + if (SDValue V = reduceBuildVecToTrunc(N)) + return V; + if (SDValue V = reduceBuildVecToShuffle(N)) return V; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index b235e19aaab29..b96c96f0b4df4 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -589,7 +589,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } else AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - // Add the subregster being inserted + // Add the subregister being inserted AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 873b2bd48f1e0..7e4bc3ccb5d39 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1991,7 +1991,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::move(Args)) .setTailCall(isTailCall) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2029,7 +2030,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -3565,16 +3567,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } - BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0, dl)); - TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1, dl)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, make sure the - // node has no more uses. The above EXTRACT_ELEMENT nodes should have been - // folded. - assert(Ret->use_empty() && - "Unexpected uses of illegally type from expanded lib call."); + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); } if (isSigned) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c1cb5d9b5235e..eaf177d0661b3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -112,15 +112,15 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - // If R is null, the sub-method took care of registering the result. - if (R.getNode()) { + if (R.getNode() && R.getNode() != N) { SetSoftenedFloat(SDValue(N, ResNo), R); - ReplaceSoftenFloatResult(N, ResNo, R); + // Return true only if the node is changed, assuming that the operands + // are also converted when necessary. + return true; } - // Return true only if the node is changed, - // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. - return R.getNode() && R.getNode() != N; + return false; } SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { @@ -753,12 +753,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; + case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: @@ -791,9 +796,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) return false; - // When the operand type can be kept in registers, SoftenFloatResult - // will call ReplaceValueWith to replace all references and we can - // skip softening this operand. + + // When the operand type can be kept in registers there is nothing to do for + // the following opcodes. switch (N->getOperand(OpNo).getOpcode()) { case ISD::BITCAST: case ISD::ConstantFP: @@ -807,18 +812,12 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: return true; } - // For some opcodes, SoftenFloatResult handles all conversion of softening - // and replacing operands, so that there is no need to soften operands - // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: + case ISD::ConstantFP: // Leaf node. + case ISD::CopyFromReg: // Operand is a register that we know to be left + // unchanged by SoftenFloatResult(). + case ISD::Register: // Leaf node. return true; } return false; @@ -829,6 +828,21 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + if (N->getNumOperands() == 3) + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, + N->getOperand(3)), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { // If we get here, the result must be legal but the source illegal. EVT SVT = N->getOperand(0).getValueType(); @@ -884,6 +898,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + + if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -913,6 +955,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 154af46c94464..001eed9fb8f62 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -80,6 +80,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); + EVT VT = Res.getValueType(); bool Failed = false; unsigned Mapped = 0; @@ -129,13 +130,17 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - if (Mapped == 0) { + // If the value can be kept in HW registers, softening machinery can + // leave it unchanged and don't put it to any map. + if (Mapped == 0 && + !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && + isLegalInHWReg(VT))) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -331,11 +336,6 @@ ScanOperands: if (NeedsReanalyzing) { assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); - // Remove any result values from SoftenedFloats as N will be revisited - // again. - for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) - SoftenedFloats.erase(SDValue(N, i)); - N->setNodeId(NewNode); // Recompute the NodeId and correct processed operands, adding the node to // the worklist if ready. @@ -754,8 +754,6 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // new uses of From due to CSE. If this happens, replace the new uses of // From with To. } while (!From.use_empty()); - - SoftenedFloats.erase(From); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8e999188d8e10..e102df5e913d9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -416,16 +416,6 @@ private: } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. - void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { - // When the result type can be kept in HW registers, the converted - // NewRes node could have the same type. We can save the effort in - // cloning every user of N in SoftenFloatOperand or other legalization functions, - // by calling ReplaceValueWith here to update all users. - if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) - ReplaceValueWith(SDValue(N, ResNo), NewRes); - } - // Convert Float Results to Integer for Non-HW-supported Operations. bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -471,17 +461,23 @@ private: SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); // Return true if we can skip softening the given operand or SDNode because - // it was soften before by SoftenFloatResult and references to the operand - // were replaced by ReplaceValueWith. + // either it was soften before by SoftenFloatResult and references to the + // operand were replaced by ReplaceValueWith or it's value type is legal in HW + // registers and the operand can be left unchanged. bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FABS(SDNode *N); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index aa69e0e2adfce..f3306151d864b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -57,7 +57,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the floating point operand only if it was converted to integers. // Otherwise, it is a legal type like f128 that can be saved in a register. auto SoftenedOp = GetSoftenedFloat(InOp); - if (SoftenedOp == InOp) + if (isLegalInHWReg(SoftenedOp.getValueType())) break; SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ff0e609803d8a..d41054b15bbcf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2977,7 +2977,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. unsigned InMaskOpc = InMask->getOpcode(); + + // FIXME: This code seems to be too restrictive, we might consider + // generalizing it or dropping it. assert((InMaskOpc == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) || (isLogicalMaskOp(InMaskOpc) && isSETCCorConvertedSETCC(InMask->getOperand(0)) && isSETCCorConvertedSETCC(InMask->getOperand(1)))) && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98553152117d1..823e77850c4ba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -5442,7 +5443,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + AtomicOrdering FailureOrdering, SyncScope::ID SSID) { assert(Opcode == ISD::ATOMIC_CMP_SWAP || Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -5458,7 +5459,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SynchScope, SuccessOrdering, + AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering); return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); @@ -5480,7 +5481,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -5500,7 +5501,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SynchScope, Ordering); + nullptr, SSID, Ordering); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); } @@ -7630,45 +7631,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, SDValue Loc = LD->getOperand(1); SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); - int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); - int FS = MFI.getObjectSize(FI); - int BFS = MFI.getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); - } - - // Handle X + C. - if (isBaseWithConstantOffset(Loc)) { - int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc) { - // If the base location is a simple address with no offset itself, then - // the second load's first add operand should be the base address. - if (LocOffset == Dist * (int)Bytes) - return true; - } else if (isBaseWithConstantOffset(BaseLoc)) { - // The base location itself has an offset, so subtract that value from the - // second load's offset before comparing to distance * size. - int64_t BOffset = - cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { - if ((LocOffset - BOffset) == Dist * (int)Bytes) - return true; - } - } - } - const GlobalValue *GV1 = nullptr; - const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); + + auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); + auto LocDecomp = BaseIndexOffset::match(Loc, *this); + + int64_t Offset = 0; + if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) + return (Dist * Bytes == Offset); return false; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 4e899ae6668e7..0d69441ebb7f7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -37,13 +37,13 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // Match non-equal FrameIndexes - a FrameIndex stemming from an - // alloca will not have it's ObjectOffset set until post-DAG and - // as such we must assume the two framesIndices are incomparable. + // Match non-equal FrameIndexes - If both frame indices are fixed + // we know their relative offsets and can compare them. Otherwise + // we must be conservative. if (auto *A = dyn_cast<FrameIndexSDNode>(Base)) if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) - if (!MFI.getObjectAllocation(A->getIndex()) && - !MFI.getObjectAllocation(B->getIndex())) { + if (MFI.isFixedObjectIndex(A->getIndex()) && + MFI.isFixedObjectIndex(B->getIndex())) { Off += MFI.getObjectOffset(B->getIndex()) - MFI.getObjectOffset(A->getIndex()); return true; @@ -60,12 +60,18 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { int64_t Offset = 0; bool IsIndexSignExt = false; - // Consume constant adds - while (Base->getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Base->getOperand(1))) { - int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue(); - Offset += POffset; - Base = Base->getOperand(0); + // Consume constant adds & ors with appropriate masking. + while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + // Only consider ORs which act as adds. + if (Base->getOpcode() == ISD::OR && + !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) + break; + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; } if (Base->getOpcode() == ISD::ADD) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index acf68fbbdedfc..41c3f5f235eab 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3220,7 +3220,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) + Indices = IV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -3228,7 +3234,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; @@ -3268,13 +3274,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) + Indices = EV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; @@ -3559,6 +3571,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MOInvariant; if (isDereferenceable) MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -3688,6 +3701,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -3978,7 +3992,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3988,7 +4002,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); SDValue OutChain = L.getValue(2); @@ -4014,7 +4028,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4025,7 +4039,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), - /* Alignment=*/ 0, Order, Scope); + /* Alignment=*/ 0, Order, SSID); SDValue OutChain = L.getValue(1); @@ -4040,7 +4054,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -4048,7 +4062,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4066,7 +4080,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT), - AAMDNodes(), nullptr, Scope, Order); + AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = @@ -4083,7 +4097,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4100,7 +4114,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - Order, Scope); + Order, SSID); DAG.setRoot(OutChain); } @@ -4982,6 +4996,83 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); DILocalVariable *Variable = DI.getVariable(); @@ -7842,6 +7933,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector<EVT, 4> OldRetTys = std::move(RetTys); + SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTSize); + } + } + SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); @@ -7924,6 +8031,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 431d52b4b9b9f..ac1d6aae65a52 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -38,7 +38,6 @@ class BranchInst; class CallInst; class DbgValueInst; class ExtractElementInst; -class ExtractValueInst; class FCmpInst; class FPExtInst; class FPToSIInst; @@ -53,7 +52,6 @@ class IntToPtrInst; class IndirectBrInst; class InvokeInst; class InsertElementInst; -class InsertValueInst; class Instruction; class LoadInst; class MachineBasicBlock; @@ -859,8 +857,8 @@ private: void visitInsertElement(const User &I); void visitShuffleVector(const User &I); - void visitExtractValue(const ExtractValueInst &I); - void visitInsertValue(const InsertValueInst &I); + void visitExtractValue(const User &I); + void visitInsertValue(const User &I); void visitLandingPad(const LandingPadInst &I); void visitGetElementPtr(const User &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f711ca71f79fe..bdf57e8058426 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1483,7 +1483,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->selectInstruction(Inst)) { - FastISelFailed = true; --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1506,8 +1505,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } + FastISelFailed = true; + // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa<CallInst>(Inst)) { + // We cannot separate out GCrelocates to their own blocks since we need + // to keep track of gc-relocates for a particular gc-statepoint. This is + // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before + // visitGCRelocate. + if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 7886737b879c2..17a3a84ecda57 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -125,8 +125,11 @@ static void MarkBlocksLiveIn(BasicBlock *BB, if (!LiveBBs.insert(BB).second) return; // already been here. - for (BasicBlock *PredBB : predecessors(BB)) - MarkBlocksLiveIn(PredBB, LiveBBs); + df_iterator_default_set<BasicBlock*> Visited; + + for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) + LiveBBs.insert(B); + } /// substituteLPadValues - Substitute the values returned by the landingpad diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 008b984dd9616..323045fd2aaae 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -53,10 +53,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB); - SmallVector<const MachineBasicBlock *, 1> EHPadSucessors; + SmallVector<const MachineBasicBlock *, 1> EHPadSuccessors; for (const MachineBasicBlock *SMBB : MBB.successors()) if (SMBB->isEHPad()) - EHPadSucessors.push_back(SMBB); + EHPadSuccessors.push_back(SMBB); // Compute insert points on the first call. The pair is independent of the // current live interval. @@ -68,7 +68,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, LIP.first = LIS.getInstructionIndex(*FirstTerm); // If there is a landing pad successor, also find the call instruction. - if (EHPadSucessors.empty()) + if (EHPadSuccessors.empty()) return LIP.first; // There may not be a call instruction (?) in which case we ignore LPad. LIP.second = LIP.first; @@ -87,7 +87,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, if (!LIP.second) return LIP.first; - if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) { + if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) { return LIS.isLiveInToMBB(CurLI, EHPad); })) return LIP.first; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e9d38c10c8601..3914ee5147122 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -384,6 +384,26 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { "__llvm_memcpy_element_unordered_atomic_8"; Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = "__llvm_memcpy_element_unordered_atomic_16"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memmove_element_unordered_atomic_1"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memmove_element_unordered_atomic_2"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memmove_element_unordered_atomic_4"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memmove_element_unordered_atomic_8"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memmove_element_unordered_atomic_16"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memset_element_unordered_atomic_1"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memset_element_unordered_atomic_2"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memset_element_unordered_atomic_4"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memset_element_unordered_atomic_8"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memset_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -803,6 +823,40 @@ RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { } } +RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + +RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { |