diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/FastISel.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/FastISel.cpp | 245 |
1 files changed, 213 insertions, 32 deletions
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index d3c94b5f9e6b..e4a9d557d386 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -61,7 +61,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -99,6 +98,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -113,6 +113,11 @@ using namespace llvm; #define DEBUG_TYPE "isel" +// FIXME: Remove this after the feature has proven reliable. +static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", + cl::init(true), cl::Hidden, + cl::desc("Sink local values in FastISel")); + STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " @@ -120,9 +125,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); /// Set the current block to which generated machine instructions will be -/// appended, and clear the local CSE map. +/// appended. void FastISel::startNewBlock() { - LocalValueMap.clear(); + assert(LocalValueMap.empty() && + "local values should be cleared after finishing a BB"); // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local @@ -133,6 +139,9 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } +/// Flush the local CSE map and sink anything we can. +void FastISel::finishBasicBlock() { flushLocalValueMap(); } + bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer @@ -153,11 +162,168 @@ bool FastISel::lowerArguments() { return true; } +/// Return the defined register if this instruction defines exactly one +/// virtual register and uses no other virtual registers. Otherwise return 0. +static unsigned findSinkableLocalRegDef(MachineInstr &MI) { + unsigned RegDef = 0; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) { + if (RegDef) + return 0; + RegDef = MO.getReg(); + } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + // This is another use of a vreg. Don't try to sink it. + return 0; + } + } + return RegDef; +} + void FastISel::flushLocalValueMap() { + // Try to sink local values down to their first use so that we can give them a + // better debug location. This has the side effect of shrinking local value + // live ranges, which helps out fast regalloc. + if (SinkLocalValues && LastLocalValue != EmitStartPt) { + // Sink local value materialization instructions between EmitStartPt and + // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to + // avoid inserting into the range that we're iterating over. + MachineBasicBlock::reverse_iterator RE = + EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) + : FuncInfo.MBB->rend(); + MachineBasicBlock::reverse_iterator RI(LastLocalValue); + + InstOrderMap OrderMap; + for (; RI != RE;) { + MachineInstr &LocalMI = *RI; + ++RI; + bool Store = true; + if (!LocalMI.isSafeToMove(nullptr, Store)) + continue; + unsigned DefReg = findSinkableLocalRegDef(LocalMI); + if (DefReg == 0) + continue; + + sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); + } + } + LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); SavedInsertPt = FuncInfo.InsertPt; + LastFlushPoint = FuncInfo.InsertPt; +} + +static bool isRegUsedByPhiNodes(unsigned DefReg, + FunctionLoweringInfo &FuncInfo) { + for (auto &P : FuncInfo.PHINodesToUpdate) + if (P.second == DefReg) + return true; + return false; +} + +/// Build a map of instruction orders. Return the first terminator and its +/// order. Consider EH_LABEL instructions to be terminators as well, since local +/// values for phis after invokes must be materialized before the call. +void FastISel::InstOrderMap::initialize( + MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) { + unsigned Order = 0; + for (MachineInstr &I : *MBB) { + if (!FirstTerminator && + (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + FirstTerminator = &I; + FirstTerminatorOrder = Order; + } + Orders[&I] = Order++; + + // We don't need to order instructions past the last flush point. + if (I.getIterator() == LastFlushPoint) + break; + } +} + +void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, + unsigned DefReg, + InstOrderMap &OrderMap) { + // If this register is used by a register fixup, MRI will not contain all + // the uses until after register fixups, so don't attempt to sink or DCE + // this instruction. Register fixups typically come from no-op cast + // instructions, which replace the cast instruction vreg with the local + // value vreg. + if (FuncInfo.RegsWithFixups.count(DefReg)) + return; + + // We can DCE this instruction if there are no uses and it wasn't a + // materialized for a successor PHI node. + bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); + if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { + if (EmitStartPt == &LocalMI) + EmitStartPt = EmitStartPt->getPrevNode(); + LLVM_DEBUG(dbgs() << "removing dead local value materialization " + << LocalMI); + OrderMap.Orders.erase(&LocalMI); + LocalMI.eraseFromParent(); + return; + } + + // Number the instructions if we haven't yet so we can efficiently find the + // earliest use. + if (OrderMap.Orders.empty()) + OrderMap.initialize(FuncInfo.MBB, LastFlushPoint); + + // Find the first user in the BB. + MachineInstr *FirstUser = nullptr; + unsigned FirstOrder = std::numeric_limits<unsigned>::max(); + for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { + auto I = OrderMap.Orders.find(&UseInst); + assert(I != OrderMap.Orders.end() && + "local value used by instruction outside local region"); + unsigned UseOrder = I->second; + if (UseOrder < FirstOrder) { + FirstOrder = UseOrder; + FirstUser = &UseInst; + } + } + + // The insertion point will be the first terminator or the first user, + // whichever came first. If there was no terminator, this must be a + // fallthrough block and the insertion point is the end of the block. + MachineBasicBlock::instr_iterator SinkPos; + if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { + FirstOrder = OrderMap.FirstTerminatorOrder; + SinkPos = OrderMap.FirstTerminator->getIterator(); + } else if (FirstUser) { + SinkPos = FirstUser->getIterator(); + } else { + assert(UsedByPHI && "must be users if not used by a phi"); + SinkPos = FuncInfo.MBB->instr_end(); + } + + // Collect all DBG_VALUEs before the new insertion position so that we can + // sink them. + SmallVector<MachineInstr *, 1> DbgValues; + for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { + if (!DbgVal.isDebugValue()) + continue; + unsigned UseOrder = OrderMap.Orders[&DbgVal]; + if (UseOrder < FirstOrder) + DbgValues.push_back(&DbgVal); + } + + // Sink LocalMI before SinkPos and assign it the same DebugLoc. + LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI); + FuncInfo.MBB->remove(&LocalMI); + FuncInfo.MBB->insert(SinkPos, &LocalMI); + if (SinkPos != FuncInfo.MBB->end()) + LocalMI.setDebugLoc(SinkPos->getDebugLoc()); + + // Sink any debug values that we've collected. + for (MachineInstr *DI : DbgValues) { + FuncInfo.MBB->remove(DI); + FuncInfo.MBB->insert(SinkPos, DI); + } } bool FastISel::hasTrivialKill(const Value *V) { @@ -328,8 +494,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { AssignedReg = Reg; else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. - for (unsigned i = 0; i < NumRegs; i++) + for (unsigned i = 0; i < NumRegs; i++) { FuncInfo.RegFixups[AssignedReg + i] = Reg + i; + FuncInfo.RegsWithFixups.insert(Reg + i); + } AssignedReg = Reg; } @@ -681,7 +849,7 @@ bool FastISel::selectStackmap(const CallInst *I) { return true; } -/// \brief Lower an argument list according to the target calling convention. +/// Lower an argument list according to the target calling convention. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the @@ -702,7 +870,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgIdx); + Entry.setAttributes(&CS, ArgI); Args.push_back(Entry); } @@ -874,10 +1042,31 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. return true; } +bool FastISel::selectXRayTypedEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector<MachineOperand, 8> Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Typed Event Call instruction, that gets lowered properly. + return true; +} /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. @@ -1141,13 +1330,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } const Value *Address = DI->getAddress(); if (!Address || isa<UndefValue>(Address)) { - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -1182,24 +1371,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Op) { assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); - if (Op->isReg()) { - Op->setIsDebug(true); - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - Op->getReg(), DI->getVariable(), DI->getExpression()); - } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE)) - .add(*Op) - .addImm(0) - .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + *Op, DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1242,7 +1422,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1256,7 +1436,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, ResultReg); return true; } - case Intrinsic::invariant_group_barrier: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) @@ -1272,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::xray_customevent: return selectXRayCustomEvent(II); + case Intrinsic::xray_typedevent: + return selectXRayTypedEvent(II); } return fastLowerIntrinsicCall(II); @@ -2051,11 +2234,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::const_iterator I = SuccBB->begin(); - const auto *PN = dyn_cast<PHINode>(I); ++I) { - + for (const PHINode &PN : SuccBB->phis()) { // Ignore dead phi's. - if (PN->use_empty()) + if (PN.use_empty()) continue; // Only handle legal types. Two interesting things to note here. First, @@ -2064,7 +2245,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. - EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true); + EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { @@ -2073,11 +2254,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { } } - const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DbgLoc = PN->getDebugLoc(); + DbgLoc = PN.getDebugLoc(); if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); |