diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2010-05-04 16:11:02 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2010-05-04 16:11:02 +0000 |
commit | d7f7719e5e082c0b8ea2182dcbd2242b7834aa26 (patch) | |
tree | 70fbd90da02177c8e6ef82adba9fa8ace285a5e3 /lib/CodeGen | |
parent | 9f4a1da9a0a56a0b0a7f8249f34b3cdea6179c41 (diff) |
Notes
Diffstat (limited to 'lib/CodeGen')
81 files changed, 5866 insertions, 2937 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 8840622f9ae5..4008a6a63cf8 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -31,12 +31,12 @@ using namespace llvm; // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod static cl::opt<int> DebugDiv("agg-antidep-debugdiv", - cl::desc("Debug control for aggressive anti-dep breaker"), - cl::init(0), cl::Hidden); + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); static cl::opt<int> DebugMod("agg-antidep-debugmod", - cl::desc("Debug control for aggressive anti-dep breaker"), - cl::init(0), cl::Hidden); + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB) : @@ -210,7 +210,7 @@ void AggressiveAntiDepBreaker::FinishBlock() { } void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, - unsigned InsertPosIndex) { + unsigned InsertPosIndex) { assert(Count < InsertPosIndex && "Instruction index out of expected range!"); std::set<unsigned> PassthruRegs; @@ -244,7 +244,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, } bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, - MachineOperand& MO) + MachineOperand& MO) { if (!MO.isReg() || !MO.isImplicit()) return false; @@ -281,9 +281,9 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, /// AntiDepEdges - Return in Edges the anti- and output- dependencies /// in SU that we want to consider for breaking. -static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) { +static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { SmallSet<unsigned, 4> RegSet; - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { unsigned Reg = P->getReg(); @@ -297,14 +297,14 @@ static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) { /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. -static SUnit *CriticalPathStep(SUnit *SU) { - SDep *Next = 0; +static const SUnit *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. if (SU != 0) { - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); + const SUnit *PredSU = P->getSUnit(); unsigned PredLatency = P->getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over @@ -359,8 +359,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, - std::set<unsigned>& PassthruRegs) -{ + std::set<unsigned>& PassthruRegs) { unsigned *DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -439,7 +438,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, } void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, - unsigned Count) { + unsigned Count) { DEBUG(dbgs() << "\tUse Groups:"); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -704,9 +703,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( /// ScheduleDAG and break them by renaming registers. /// unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( - std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); @@ -721,20 +720,21 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( RenameOrderType RenameOrder; // ...need a map from MI to SUnit. - std::map<MachineInstr *, SUnit *> MISUnitMap; + std::map<MachineInstr *, const SUnit *> MISUnitMap; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU)); + const SUnit *SU = &SUnits[i]; + MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), + SU)); } // Track progress along the critical path through the SUnit graph as // we walk the instructions. This is needed for regclasses that only // break critical-path anti-dependencies. - SUnit *CriticalPathSU = 0; + const SUnit *CriticalPathSU = 0; MachineInstr *CriticalPathMI = 0; if (CriticalPathSet.any()) { for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; + const SUnit *SU = &SUnits[i]; if (!CriticalPathSU || ((SU->getDepth() + SU->Latency) > (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { @@ -775,8 +775,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // The dependence edges that represent anti- and output- // dependencies that are candidates for breaking. - std::vector<SDep*> Edges; - SUnit *PathSU = MISUnitMap[MI]; + std::vector<const SDep *> Edges; + const SUnit *PathSU = MISUnitMap[MI]; AntiDepEdges(PathSU, Edges); // If MI is not on the critical path, then we don't rename @@ -794,7 +794,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (!MI->isKill()) { // Attempt to break each anti-dependency... for (unsigned i = 0, e = Edges.size(); i != e; ++i) { - SDep *Edge = Edges[i]; + const SDep *Edge = Edges[i]; SUnit *NextSU = Edge->getSUnit(); if ((Edge->getKind() != SDep::Anti) && @@ -838,7 +838,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. - for (SUnit::pred_iterator P = PathSU->Preds.begin(), + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), PE = PathSU->Preds.end(); P != PE; ++P) { if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : @@ -847,7 +847,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( break; } } - for (SUnit::pred_iterator P = PathSU->Preds.begin(), + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), PE = PathSU->Preds.end(); P != PE; ++P) { if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && (P->getKind() != SDep::Output)) { diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index a62d68c2a834..506d43e7f3fc 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -142,9 +142,9 @@ namespace llvm { /// path /// of the ScheduleDAG and break them by renaming registers. /// - unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex); /// Observe - Update liveness information to account for the current diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp new file mode 100644 index 000000000000..f71eee5d01b8 --- /dev/null +++ b/lib/CodeGen/Analysis.cpp @@ -0,0 +1,285 @@ +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several CodeGen-specific LLVM IR analysis utilties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty->isVoidTy()) + return; + // Base case: we can get an EVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalVariable *llvm::ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + + if (GV && GV->getName() == ".llvm.eh.catch.all.value") { + assert(GV->hasInitializer() && + "The EH catch-all value must have an initializer"); + Value *Init = GV->getInitializer(); + GV = dyn_cast<GlobalVariable>(Init); + if (!GV) V = cast<ConstantPointerNull>(Init); + } + + assert((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +bool +llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { + ISD::CondCode FPC, FOC; + switch (Pred) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + llvm_unreachable("Invalid FCmp predicate opcode!"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (FiniteOnlyFPMath()) + return FOC; + else + return FPC; +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + llvm_unreachable("Invalid ICmp predicate opcode!"); + return ISD::SETNE; + } +} + +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, + const TargetLowering &TLI) { + const Instruction *I = CS.getInstruction(); + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); + const Function *F = ExitBB->getParent(); + + // The block must end in a return statement or unreachable. + // + // FIXME: Decline tailcall if it's not guaranteed and if the block ends in + // an unreachable, for now. The way tailcall optimization is currently + // implemented means it will add an epilogue followed by a jump. That is + // not profitable. Also, if the callee is a special function (e.g. + // longjmp on x86), it can end up causing miscompilation that has not + // been fully understood. + if (!Ret && + (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !I->isSafeToSpeculativelyExecute()) + for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; + --BBI) { + if (&*BBI == I) + break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !BBI->isSafeToSpeculativelyExecute()) + return false; + } + + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Otherwise, make sure the unmodified return value of I is the return value. + for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; + U = dyn_cast<Instruction>(U->getOperand(0))) { + if (!U) + return false; + if (!U->hasOneUse()) + return false; + if (U == I) + break; + // Check for a truly no-op truncate. + if (isa<TruncInst>(U) && + TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) + continue; + // Check for a truly no-op bitcast. + if (isa<BitCastInst>(U) && + (U->getOperand(0)->getType() == U->getType() || + (U->getOperand(0)->getType()->isPointerTy() && + U->getType()->isPointerTy()))) + continue; + // Otherwise it's not a true no-op. + return false; + } + + return true; +} + diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 3ee30c6a18e3..086b75795638 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -39,9 +39,9 @@ public: /// basic-block region and break them by renaming registers. Return /// the number of anti-dependencies broken. /// - virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex) =0; /// Observe - Update liveness information to account for the current diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c86e2411d303..ded4b3f18bbc 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -42,8 +42,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Timer.h" using namespace llvm; +static const char *DWARFGroupName = "DWARF Emission"; +static const char *DbgTimerName = "DWARF Debug Writer"; +static const char *EHTimerName = "DWARF Exception Writer"; + STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; @@ -56,6 +61,35 @@ static gcp_map_type &getGCMap(void *&P) { } +/// getGVAlignmentLog2 - Return the alignment to use for the specified global +/// value in log2 form. This rounds up to the preferred alignment if possible +/// and legal. +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, + unsigned InBits = 0) { + unsigned NumBits = 0; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + NumBits = TD.getPreferredAlignmentLog(GVar); + + // If InBits is specified, round it to it. + if (InBits > NumBits) + NumBits = InBits; + + // If the GV has a specified alignment, take it into account. + if (GV->getAlignment() == 0) + return NumBits; + + unsigned GVAlign = Log2_32(GV->getAlignment()); + + // If the GVAlign is larger than NumBits, or if we are required to obey + // NumBits because the GV has an assigned section, obey it. + if (GVAlign > NumBits || GV->hasSection()) + NumBits = GVAlign; + return NumBits; +} + + + + AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(&ID), TM(tm), MAI(tm.getMCAsmInfo()), @@ -88,7 +122,7 @@ unsigned AsmPrinter::getFunctionNumber() const { return MF->getFunctionNumber(); } -TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { +const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { return TM.getTargetLowering()->getObjFileLowering(); } @@ -222,8 +256,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); const TargetData *TD = TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - unsigned AlignLog = TD->getPreferredAlignmentLog(GV); + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + unsigned AlignLog = getGVAlignmentLog2(GV, *TD); // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -270,6 +308,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle the zerofill directive on darwin, which is a special form of BSS // emission. if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. + // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .zerofill __DATA, __common, _foo, 400, 5 @@ -347,8 +387,22 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit pre-function debug and/or EH information. - if (DE) DE->BeginFunction(MF); - if (DD) DD->beginFunction(MF); + if (DE) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(EHTimerName, DWARFGroupName); + DE->BeginFunction(MF); + } else { + DE->BeginFunction(MF); + } + } + if (DD) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->beginFunction(MF); + } else { + DD->beginFunction(MF); + } + } } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -434,7 +488,58 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer.AddBlankLine(); } +/// EmitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + // This code handles only the 3-operand target-independent form. + if (MI->getNumOperands() != 3) + return false; + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; + + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); + if (V.getContext().isSubprogram()) + OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; + OS << V.getName() << " <- "; + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + OS << (double)APF.convertToFloat(); + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + } else if (MI->getOperand(0).isImm()) { + OS << MI->getOperand(0).getImm(); + } else { + assert(MI->getOperand(0).isReg() && "Unknown operand type"); + if (MI->getOperand(0).getReg() == 0) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; + } + OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + } + + OS << '+' << MI->getOperand(1).getImm(); + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; +} /// EmitFunctionBody - This method emits the body and trailer for a /// function. @@ -453,13 +558,20 @@ void AsmPrinter::EmitFunctionBody() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { // Print the assembly for the instruction. - if (!II->isLabel()) + if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && + !II->isDebugValue()) { HasAnyRealCode = true; - - ++EmittedInsts; - - if (ShouldPrintDebugScopes) - DD->beginScope(II); + ++EmittedInsts; + } + + if (ShouldPrintDebugScopes) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->beginScope(II); + } else { + DD->beginScope(II); + } + } if (isVerbose()) EmitComments(*II, OutStreamer.GetCommentOS()); @@ -473,6 +585,12 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::INLINEASM: EmitInlineAsm(II); break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!EmitDebugValueComment(II, *this)) + EmitInstruction(II); + } + break; case TargetOpcode::IMPLICIT_DEF: if (isVerbose()) EmitImplicitDef(II, *this); break; @@ -484,16 +602,29 @@ void AsmPrinter::EmitFunctionBody() { break; } - if (ShouldPrintDebugScopes) - DD->endScope(II); + if (ShouldPrintDebugScopes) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->endScope(II); + } else { + DD->endScope(II); + } + } } } // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the - // labels from collapsing together. Just emit a 0 byte. - if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) - OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/); + // labels from collapsing together. Just emit a noop. + if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) { + MCInst Noop; + TM.getInstrInfo()->getNoopForMachoTarget(Noop); + if (Noop.getOpcode()) { + OutStreamer.AddComment("avoids zero-length function"); + OutStreamer.EmitInstruction(Noop); + } else // Target not mc-ized yet. + OutStreamer.EmitRawText(StringRef("\tnop\n")); + } // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); @@ -514,8 +645,22 @@ void AsmPrinter::EmitFunctionBody() { } // Emit post-function debug information. - if (DD) DD->endFunction(MF); - if (DE) DE->EndFunction(); + if (DD) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->endFunction(MF); + } else { + DD->endFunction(MF); + } + } + if (DE) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(EHTimerName, DWARFGroupName); + DE->EndFunction(); + } else { + DE->EndFunction(); + } + } MMI->EndFunction(); // Print out jump tables referenced by the function. @@ -524,6 +669,12 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } +/// getDebugValueLocation - Get location information encoded by DBG_VALUE +/// operands. +MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // Target specific DBG_VALUE instructions are handled by each target. + return MachineLocation(); +} bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. @@ -533,11 +684,21 @@ bool AsmPrinter::doFinalization(Module &M) { // Finalize debug and EH information. if (DE) { - DE->EndModule(); + if (TimePassesIsEnabled) { + NamedRegionTimer T(EHTimerName, DWARFGroupName); + DE->EndModule(); + } else { + DE->EndModule(); + } delete DE; DE = 0; } if (DD) { - DD->endModule(); + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->endModule(); + } else { + DD->endModule(); + } delete DD; DD = 0; } @@ -595,7 +756,7 @@ bool AsmPrinter::doFinalization(Module &M) { // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) - if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer.SwitchSection(S); // Allow the target to emit any magic that it wants at the end of the file, @@ -877,7 +1038,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); - EmitAlignment(Align, 0); + EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); if (TM.getRelocationModel() == Reloc::Static && @@ -891,7 +1052,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.global_dtors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); - EmitAlignment(Align, 0); + EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); if (TM.getRelocationModel() == Reloc::Static && @@ -984,30 +1145,49 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); } +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// where the size in bytes of the directive is specified by Size and Hi/Lo +/// specify the labels. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, + const MCSymbol *Lo, unsigned Size) + const { + + // Emit Hi+Offset - Lo + // Get the Hi+Offset expression. + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + // Get the Hi+Offset-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::CreateSub(Plus, + MCSymbolRefExpr::Create(Lo, OutContext), + OutContext); + + if (!MAI->hasSetDirective()) + OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); + else { + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + OutStreamer.EmitAssignment(SetLabel, Diff); + OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); + } +} + //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of // two boundary. For example, if you pass in 3 here, you will get an 8 // byte alignment. If a global value is specified, and if that global has -// an explicit alignment requested, it will unconditionally override the -// alignment request. However, if ForcedAlignBits is specified, this value -// has final say: the ultimate alignment will be the max of ForcedAlignBits -// and the alignment computed with NumBits and the global. +// an explicit alignment requested, it will override the alignment request +// if required for correctness. // -// The algorithm is: -// Align = NumBits; -// if (GV && GV->hasalignment) Align = GV->getalignment(); -// Align = std::max(Align, ForcedAlignBits); -// -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, - unsigned ForcedAlignBits, - bool UseFillExpr) const { - if (GV && GV->getAlignment()) - NumBits = Log2_32(GV->getAlignment()); - NumBits = std::max(NumBits, ForcedAlignBits); +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { + if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); - if (NumBits == 0) return; // No need to emit alignment. + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); @@ -1015,6 +1195,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); } +//===----------------------------------------------------------------------===// +// Constant emission. +//===----------------------------------------------------------------------===// + /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. /// static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { @@ -1142,12 +1326,15 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { } } +static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, + AsmPrinter &AP); + static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { if (AddrSpace != 0 || !CA->isString()) { // Not a string. Print the values in successive locations for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - AP.EmitGlobalConstant(CA->getOperand(i), AddrSpace); + EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); return; } @@ -1163,7 +1350,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, static void EmitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - AP.EmitGlobalConstant(CV->getOperand(i), AddrSpace); + EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); } static void EmitGlobalConstantStruct(const ConstantStruct *CS, @@ -1183,7 +1370,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - AP.EmitGlobalConstant(Field, AddrSpace); + EmitGlobalConstantImpl(Field, AddrSpace, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well @@ -1203,7 +1390,7 @@ static void EmitGlobalConstantUnion(const ConstantUnion *CU, unsigned FilledSize = TD->getTypeAllocSize(Contents->getType()); // Print the actually filled part - AP.EmitGlobalConstant(Contents, AddrSpace); + EmitGlobalConstantImpl(Contents, AddrSpace, AP); // And pad with enough zeroes AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace); @@ -1236,7 +1423,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex - // api needed to prevent premature destruction + // API needed to prevent premature destruction APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.isVerbose()) { @@ -1266,8 +1453,8 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); - // All long double variants are printed as hex api needed to prevent - // premature destruction. + // All long double variants are printed as hex + // API needed to prevent premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.TM.getTargetData()->isBigEndian()) { @@ -1295,57 +1482,68 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, } } -/// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { +static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, + AsmPrinter &AP) { if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { - uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); - if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef. - return OutStreamer.EmitZeros(Size, AddrSpace); + uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + return AP.OutStreamer.EmitZeros(Size, AddrSpace); } if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); switch (Size) { case 1: case 2: case 4: case 8: - if (isVerbose()) - OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); - OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: - EmitGlobalConstantLargeInt(CI, AddrSpace, *this); + EmitGlobalConstantLargeInt(CI, AddrSpace, AP); return; } } if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, *this); + return EmitGlobalConstantArray(CVA, AddrSpace, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, *this); + return EmitGlobalConstantStruct(CVS, AddrSpace, AP); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return EmitGlobalConstantFP(CFP, AddrSpace, *this); + return EmitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { - unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); - OutStreamer.EmitIntValue(0, Size, AddrSpace); + unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV)) - return EmitGlobalConstantUnion(CVU, AddrSpace, *this); + return EmitGlobalConstantUnion(CVU, AddrSpace, AP); if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return EmitGlobalConstantVector(V, AddrSpace, *this); + return EmitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - OutStreamer.EmitValue(LowerConstant(CV, *this), - TM.getTargetData()->getTypeAllocSize(CV->getType()), - AddrSpace); + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), + AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), + AddrSpace); +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + if (Size) + EmitGlobalConstantImpl(CV, AddrSpace, *this); + else if (MAI->hasSubsectionsViaSymbols()) { + // If the global has zero size, emit a single byte so that two labels don't + // look like they are at the same location. + OutStreamer.EmitIntValue(0, 1, AddrSpace); + } } void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { @@ -1613,7 +1811,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { return GMP; } - llvm_report_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); return 0; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 255bcd413f22..37d10e5c4ccd 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -74,15 +75,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI); OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser)); if (!TAP) - llvm_report_error("Inline asm not supported by this streamer because" - " we don't have an asm parser for this target\n"); + report_fatal_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); Parser.setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. int Res = Parser.Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); if (Res && !HasDiagHandler) - llvm_report_error("Error parsing inline asm\n"); + report_fatal_error("Error parsing inline asm\n"); } @@ -97,7 +98,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { unsigned NumDefs = 0; for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); ++NumDefs) - assert(NumDefs != NumOperands-1 && "No asm string?"); + assert(NumDefs != NumOperands-2 && "No asm string?"); assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); @@ -123,6 +124,20 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ MAI->getInlineAsmStart()); + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata()) + if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata()) + if (SrcLoc->getNumOperands() != 0) + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + // Emit the inline asm to a temporary string so we can emit it through // EmitInlineAsm. SmallString<256> StringData; @@ -167,10 +182,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { break; case '(': // $( -> same as GCC's { character. ++LastEmitted; // Consume '(' character. - if (CurVariant != -1) { - llvm_report_error("Nested variants found in inline asm string: '" - + std::string(AsmStr) + "'"); - } + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); CurVariant = 0; // We're in the first variant now. break; case '|': @@ -204,8 +218,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); if (StrEnd == 0) - llvm_report_error(Twine("Unterminated ${:foo} operand in inline asm" - " string: '") + Twine(AsmStr) + "'"); + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); std::string Val(StrStart, StrEnd); PrintSpecial(MI, OS, Val.c_str()); @@ -219,8 +233,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { unsigned Val; if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) - llvm_report_error("Bad $ operand number in inline asm string: '" - + std::string(AsmStr) + "'"); + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); LastEmitted = IDEnd; char Modifier[2] = { 0, 0 }; @@ -231,22 +245,22 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (*LastEmitted == ':') { ++LastEmitted; // Consume ':' character. if (*LastEmitted == 0) - llvm_report_error("Bad ${:} expression in inline asm string: '" + - std::string(AsmStr) + "'"); + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); Modifier[0] = *LastEmitted; ++LastEmitted; // Consume modifier character. } if (*LastEmitted != '}') - llvm_report_error("Bad ${} expression in inline asm string: '" - + std::string(AsmStr) + "'"); + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); ++LastEmitted; // Consume '}' character. } if (Val >= NumOperands-1) - llvm_report_error("Invalid $ operand number in inline asm string: '" - + std::string(AsmStr) + "'"); + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); // Okay, we finally have a value number. Ask the target to print this // operand! @@ -273,7 +287,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); else { AsmPrinter *AP = const_cast<AsmPrinter*>(this); - if ((OpFlags & 7) == 4) { + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, Modifier[0] ? Modifier : 0, OS); @@ -286,9 +300,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (Error) { std::string msg; raw_string_ostream Msg(msg); - Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n"; - MI->print(Msg); - llvm_report_error(Msg.str()); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); } } break; @@ -296,7 +309,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), 0/*no loc cookie*/); + EmitInlineAsm(OS.str(), LocCookie); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. @@ -334,7 +347,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, raw_string_ostream Msg(msg); Msg << "Unknown special formatter '" << Code << "' for machine instr: " << *MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index afc482dd15bf..ca8b8436c11f 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -7,3 +7,5 @@ add_llvm_library(LLVMAsmPrinter DwarfException.cpp OcamlGCPrinter.cpp ) + +target_link_libraries (LLVMAsmPrinter LLVMMCParser) diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 454326db0ea1..9cb8314bd959 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -261,11 +261,12 @@ namespace llvm { /// virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + uint64_t getValue() const { return Integer; } + /// SizeOf - Determine size of integer value in bytes. /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; - // Implement isa/cast/dyncast. static bool classof(const DIEInteger *) { return true; } static bool classof(const DIEValue *I) { return I->getType() == isInteger; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b472d1e5335c..e9e9ba55db13 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -28,9 +28,11 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" @@ -39,6 +41,17 @@ #include "llvm/System/Path.h" using namespace llvm; +static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, + cl::desc("Print DbgScope information for each machine instruction")); + +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + +namespace { + const char *DWARFGroupName = "DWARF Emission"; + const char *DbgTimerName = "DWARF Debug Writer"; +} // end anonymous namespace + //===----------------------------------------------------------------------===// /// Configuration values for initial hash set sizes (log2). @@ -179,6 +192,12 @@ public: }; //===----------------------------------------------------------------------===// +/// DbgRange - This is used to track range of instructions with identical +/// debug info scope. +/// +typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange; + +//===----------------------------------------------------------------------===// /// DbgScope - This class is used to track scope information. /// class DbgScope { @@ -187,22 +206,21 @@ class DbgScope { // Location at which this scope is inlined. AssertingVH<MDNode> InlinedAtLocation; bool AbstractScope; // Abstract Scope - MCSymbol *StartLabel; // Label ID of the beginning of scope. - MCSymbol *EndLabel; // Label ID of the end of scope. const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. + unsigned DFSIn, DFSOut; // Scopes defined in scope. Contents not owned. SmallVector<DbgScope *, 4> Scopes; // Variables declared in scope. Contents owned. SmallVector<DbgVariable *, 8> Variables; - + SmallVector<DbgRange, 4> Ranges; // Private state for dump() mutable unsigned IndentLevel; public: DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0) : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), - StartLabel(0), EndLabel(0), - LastInsn(0), FirstInsn(0), IndentLevel(0) {} + LastInsn(0), FirstInsn(0), + DFSIn(0), DFSOut(0), IndentLevel(0) {} virtual ~DbgScope(); // Accessors. @@ -211,18 +229,57 @@ public: DIDescriptor getDesc() const { return Desc; } MDNode *getInlinedAt() const { return InlinedAtLocation; } MDNode *getScopeNode() const { return Desc.getNode(); } - MCSymbol *getStartLabel() const { return StartLabel; } - MCSymbol *getEndLabel() const { return EndLabel; } const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } - void setStartLabel(MCSymbol *S) { StartLabel = S; } - void setEndLabel(MCSymbol *E) { EndLabel = E; } - void setLastInsn(const MachineInstr *MI) { LastInsn = MI; } - const MachineInstr *getLastInsn() { return LastInsn; } - void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; } + const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } + + /// openInsnRange - This scope covers instruction range starting from MI. + void openInsnRange(const MachineInstr *MI) { + if (!FirstInsn) + FirstInsn = MI; + + if (Parent) + Parent->openInsnRange(MI); + } + + /// extendInsnRange - Extend the current instruction range covered by + /// this scope. + void extendInsnRange(const MachineInstr *MI) { + assert (FirstInsn && "MI Range is not open!"); + LastInsn = MI; + if (Parent) + Parent->extendInsnRange(MI); + } + + /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected + /// until now. This is used when a new scope is encountered while walking + /// machine instructions. + void closeInsnRange(DbgScope *NewScope = NULL) { + assert (LastInsn && "Last insn missing!"); + Ranges.push_back(DbgRange(FirstInsn, LastInsn)); + FirstInsn = NULL; + LastInsn = NULL; + // If Parent dominates NewScope then do not close Parent's instruction + // range. + if (Parent && (!NewScope || !Parent->dominates(NewScope))) + Parent->closeInsnRange(NewScope); + } + void setAbstractScope() { AbstractScope = true; } bool isAbstractScope() const { return AbstractScope; } - const MachineInstr *getFirstInsn() { return FirstInsn; } + + // Depth First Search support to walk and mainpluate DbgScope hierarchy. + unsigned getDFSOut() const { return DFSOut; } + void setDFSOut(unsigned O) { DFSOut = O; } + unsigned getDFSIn() const { return DFSIn; } + void setDFSIn(unsigned I) { DFSIn = I; } + bool dominates(const DbgScope *S) { + if (S == this) + return true; + if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut()) + return true; + return false; + } /// addScope - Add a scope to the scope. /// @@ -232,48 +289,11 @@ public: /// void addVariable(DbgVariable *V) { Variables.push_back(V); } - void fixInstructionMarkers(DenseMap<const MachineInstr *, - unsigned> &MIIndexMap) { - assert(getFirstInsn() && "First instruction is missing!"); - - // Use the end of last child scope as end of this scope. - const SmallVector<DbgScope *, 4> &Scopes = getScopes(); - const MachineInstr *LastInsn = getFirstInsn(); - unsigned LIndex = 0; - if (Scopes.empty()) { - assert(getLastInsn() && "Inner most scope does not have last insn!"); - return; - } - for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(), - SE = Scopes.end(); SI != SE; ++SI) { - DbgScope *DS = *SI; - DS->fixInstructionMarkers(MIIndexMap); - const MachineInstr *DSLastInsn = DS->getLastInsn(); - unsigned DSI = MIIndexMap[DSLastInsn]; - if (DSI > LIndex) { - LastInsn = DSLastInsn; - LIndex = DSI; - } - } - - unsigned CurrentLastInsnIndex = 0; - if (const MachineInstr *CL = getLastInsn()) - CurrentLastInsnIndex = MIIndexMap[CL]; - unsigned FIndex = MIIndexMap[getFirstInsn()]; - - // Set LastInsn as the last instruction for this scope only if - // it follows - // 1) this scope's first instruction and - // 2) current last instruction for this scope, if any. - if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex) - setLastInsn(LastInsn); - } - #ifndef NDEBUG void dump() const; #endif }; - + } // end llvm namespace #ifndef NDEBUG @@ -282,7 +302,6 @@ void DbgScope::dump() const { err.indent(IndentLevel); MDNode *N = Desc.getNode(); N->dump(); - err << " [" << StartLabel << ", " << EndLabel << "]\n"; if (AbstractScope) err << "Abstract Scope\n"; @@ -305,22 +324,23 @@ DbgScope::~DbgScope() { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), - CurrentFnDbgScope(0), DebugTimer(0) { + CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - - if (TimePassesIsEnabled) - DebugTimer = new Timer("Dwarf Debug Writer"); - - beginModule(M); + DwarfDebugRangeSectionSym = 0; + FunctionBeginSym = 0; + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + beginModule(M); + } else { + beginModule(M); + } } DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) DIEBlocks[j]->~DIEBlock(); - - delete DebugTimer; } MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { @@ -792,6 +812,64 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, addBlock(Die, Attribute, 0, Block); } +/// addRegisterAddress - Add register location entry in variable DIE. +bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV, + const MachineOperand &MO) { + assert (MO.isReg() && "Invalid machine operand!"); + if (!MO.getReg()) + return false; + MachineLocation Location; + Location.set(MO.getReg()); + addAddress(Die, dwarf::DW_AT_location, Location); + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV, + const MachineOperand &MO) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned Imm = MO.getImm(); + addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, + const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + VS); + return true; +} + + /// addToContextOwner - Add Die into the list of its context owner's children. void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { if (Context.isType()) { @@ -1250,59 +1328,16 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (SP.isArtificial()) addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + if (!SP.isLocalToUnit()) + addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + if (SP.isOptimized()) + addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + // DW_TAG_inlined_subroutine may refer to this DIE. ModuleCU->insertDIE(SP.getNode(), SPDie); - return SPDie; -} - -/// getUpdatedDbgScope - Find DbgScope assicated with the instruction. -/// Update scope hierarchy. Create abstract scope if required. -DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt) { - assert(N && "Invalid Scope encoding!"); - assert(MI && "Missing machine instruction!"); - bool isAConcreteScope = InlinedAt != 0; - - DbgScope *NScope = NULL; - - if (InlinedAt) - NScope = DbgScopeMap.lookup(InlinedAt); - else - NScope = DbgScopeMap.lookup(N); - assert(NScope && "Unable to find working scope!"); - - if (NScope->getFirstInsn()) - return NScope; - DbgScope *Parent = NULL; - if (isAConcreteScope) { - DILocation IL(InlinedAt); - Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, - IL.getOrigLocation().getNode()); - assert(Parent && "Unable to find Parent scope!"); - NScope->setParent(Parent); - Parent->addScope(NScope); - } else if (DIDescriptor(N).isLexicalBlock()) { - DILexicalBlock DB(N); - Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt); - NScope->setParent(Parent); - Parent->addScope(NScope); - } - - NScope->setFirstInsn(MI); - - if (!Parent && !InlinedAt) { - StringRef SPName = DISubprogram(N).getLinkageName(); - if (SPName == Asm->MF->getFunction()->getName()) - CurrentFnDbgScope = NScope; - } - - if (isAConcreteScope) { - ConcreteScopes[InlinedAt] = NScope; - getOrCreateAbstractScope(N); - } - - return NScope; + return SPDie; } DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { @@ -1332,6 +1367,19 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { return AScope; } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +static bool isSubprogramContext(MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext().getNode()); + return false; +} + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert @@ -1347,7 +1395,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && !SP.getContext().isSubprogram()) { + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext().getNode())) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. @@ -1378,31 +1427,48 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { MachineLocation Location(RI->getFrameRegister(*Asm->MF)); addAddress(SPDie, dwarf::DW_AT_frame_base, Location); - if (!DISubprogram(SPNode).isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - return SPDie; } /// constructLexicalScope - Construct new DW_TAG_lexical_block /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { - MCSymbol *Start = Scope->getStartLabel(); - MCSymbol *End = Scope->getEndLabel(); - if (Start == 0 || End == 0) return 0; - assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); - assert(End->isDefined() && "Invalid end label for an inlined scope!"); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Start ? Start : Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - End ? End : Asm->GetTempSymbol("func_end",Asm->getFunctionNumber())); + const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return 0; + + SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + if (Ranges.size() > 1) { + // .debug_range section has not been laid out yet. Emit offset in + // .debug_range as a uint, size 4, for now. emitDIE will handle + // DW_AT_ranges appropriately. + addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first)); + DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second)); + } + DebugRangeSymbols.push_back(NULL); + DebugRangeSymbols.push_back(NULL); + return ScopeDIE; + } + + MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first); + MCSymbol *End = LabelsAfterInsn.lookup(RI->second); + + if (Start == 0 || End == 0) return 0; + + assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); + assert(End->isDefined() && "Invalid end label for an inlined scope!"); + + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); return ScopeDIE; } @@ -1411,14 +1477,28 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { /// a function. Construct DIE to represent this concrete inlined copy /// of the function. DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { - MCSymbol *StartLabel = Scope->getStartLabel(); - MCSymbol *EndLabel = Scope->getEndLabel(); - if (StartLabel == 0 || EndLabel == 0) return 0; - + + const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + assert (Ranges.empty() == false + && "DbgScope does not have instruction markers!"); + + // FIXME : .debug_inlined section specification does not clearly state how + // to emit inlined scope that is split into multiple instruction ranges. + // For now, use first instruction range and emit low_pc/high_pc pair and + // corresponding .debug_inlined section entry for this pair. + SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first); + MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second); + + if (StartLabel == 0 || EndLabel == 0) { + assert (0 && "Unexpected Start and End labels for a inlined scope!"); + return 0; + } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); + if (!Scope->getScopeNode()) return NULL; DIScope DS(Scope->getScopeNode()); @@ -1512,59 +1592,34 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Add variable address. if (!Scope->isAbstractScope()) { // Check if variable is described by DBG_VALUE instruction. - if (const MachineInstr *DbgValueInsn = DV->getDbgValue()) { - if (DbgValueInsn->getNumOperands() == 3) { - // FIXME : Handle getNumOperands != 3 - if (DbgValueInsn->getOperand(0).getType() - == MachineOperand::MO_Register - && DbgValueInsn->getOperand(0).getReg()) { - MachineLocation Location; - Location.set(DbgValueInsn->getOperand(0).getReg()); + if (const MachineInstr *DVInsn = DV->getDbgValue()) { + bool updated = false; + // FIXME : Handle getNumOperands != 3 + if (DVInsn->getNumOperands() == 3) { + if (DVInsn->getOperand(0).isReg()) + updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isImm()) + updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isFPImm()) + updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0)); + } else { + MachineLocation Location = Asm->getDebugValueLocation(DVInsn); + if (Location.getReg()) { addAddress(VariableDie, dwarf::DW_AT_location, Location); if (MCSymbol *VS = DV->getDbgValueLabel()) addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); - } else if (DbgValueInsn->getOperand(0).getType() == - MachineOperand::MO_Immediate) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned Imm = DbgValueInsn->getOperand(0).getImm(); - addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); - addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); - if (MCSymbol *VS = DV->getDbgValueLabel()) - addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - VS); - } else if (DbgValueInsn->getOperand(0).getType() == - MachineOperand::MO_FPImmediate) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); - - unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); - - addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); - - if (MCSymbol *VS = DV->getDbgValueLabel()) - addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - VS); - } else { - //FIXME : Handle other operand types. - delete VariableDie; - return NULL; + updated = true; } - } - } else { + } + if (!updated) { + // If variableDie is not updated then DBG_VALUE instruction does not + // have valid variable info. + delete VariableDie; + return NULL; + } + } + else { MachineLocation Location; unsigned FrameReg; const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); @@ -1600,7 +1655,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { if (!ATy.isValid()) continue; DICompositeType CATy = getDICompositeType(ATy); - if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()) { + if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty() + && !CATy.isForwardDecl()) { if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode())) ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry()); } @@ -1716,9 +1772,9 @@ void DwarfDebug::constructCompileUnit(MDNode *N) { addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, DIUnit.getLanguage()); addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - addLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, TextSectionSym); - addLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("text_end")); + // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This + // simplifies debug range entries. + addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. It is always zero when only one // compile unit is emitted in one object file. @@ -1766,7 +1822,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { // Do not create specification DIE if context is either compile unit // or a subprogram. if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !GVContext.isSubprogram()) { + !GVContext.isFile() && + !isSubprogramContext(GVContext.getNode())) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, @@ -1791,7 +1848,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { ModuleCU->addGlobal(DI_GV.getName(), VariableDie); DIType GTy = DI_GV.getType(); - if (GTy.isCompositeType() && !GTy.getName().empty()) { + if (GTy.isCompositeType() && !GTy.getName().empty() + && !GTy.isForwardDecl()) { DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode()); assert(Entry && "Missing global type!"); ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry()); @@ -1829,7 +1887,8 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { /// content. Create global DIEs and emit initial debug info sections. /// This is inovked by the target AsmPrinter. void DwarfDebug::beginModule(Module *M) { - TimeRegion Timer(DebugTimer); + if (DisableDebugInfoPrinting) + return; DebugInfoFinder DbgFinder; DbgFinder.processModule(*M); @@ -1893,10 +1952,7 @@ void DwarfDebug::beginModule(Module *M) { /// endModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::endModule() { - if (!ModuleCU) - return; - - TimeRegion Timer(DebugTimer); + if (!ModuleCU) return; // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), @@ -1905,11 +1961,6 @@ void DwarfDebug::endModule() { addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } - // Insert top level DIEs. - for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(), - TE = TopLevelDIEsVector.end(); TI != TE; ++TI) - ModuleCU->getCUDie()->addChild(*TI); - for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { DIE *SPDie = CI->first; @@ -1918,8 +1969,6 @@ void DwarfDebug::endModule() { DIE *NDie = ModuleCU->getDIE(N); if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); - // FIXME - This is not the correct approach. - //addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie } // Standard sections final addresses. @@ -2062,11 +2111,13 @@ void DwarfDebug::collectVariableInfo() { if (!MInsn->isDebugValue()) continue; - // FIXME : Lift this restriction. - if (MInsn->getNumOperands() != 3) + // Ignore Undef values. + if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg()) continue; - DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands() - - 1).getMetadata())); + + DIVariable DV( + const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1) + .getMetadata())); if (DV.getTag() == dwarf::DW_TAG_arg_variable) { // FIXME Handle inlined subroutine arguments. DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL); @@ -2102,10 +2153,6 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { if (DL.isUnknown()) return; - // Check and update last known location info. - if (DL == PrevInstLoc) - return; - MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); // FIXME: Should only verify each scope once! @@ -2117,78 +2164,174 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { DenseMap<const MachineInstr *, DbgVariable *>::iterator DI = DbgValueStartMap.find(MI); if (DI != DbgValueStartMap.end()) { - MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; + MCSymbol *Label = NULL; + if (DL == PrevInstLoc) + Label = PrevLabel; + else { + Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); + PrevInstLoc = DL; + PrevLabel = Label; + } DI->second->setDbgValueLabel(Label); } return; } // Emit a label to indicate location change. This is used for line - // table even if this instruction does start a new scope. - MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - - // update DbgScope if this instruction starts a new scope. - InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); - if (I == DbgScopeBeginMap.end()) - return; + // table even if this instruction does not start a new scope. + MCSymbol *Label = NULL; + if (DL == PrevInstLoc) + Label = PrevLabel; + else { + Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); + PrevInstLoc = DL; + PrevLabel = Label; + } - ScopeVector &SD = I->second; - for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end(); - SDI != SDE; ++SDI) - (*SDI)->setStartLabel(Label); + // If this instruction begins a scope then note down corresponding label. + if (InsnsBeginScopeSet.count(MI) != 0) + LabelsBeforeInsn[MI] = Label; } /// endScope - Process end of a scope. void DwarfDebug::endScope(const MachineInstr *MI) { - // Ignore DBG_VALUE instruction. - if (MI->isDebugValue()) - return; - - // Check location. - DebugLoc DL = MI->getDebugLoc(); - if (DL.isUnknown()) - return; - - // Emit a label and update DbgScope if this instruction ends a scope. - InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); - if (I == DbgScopeEndMap.end()) - return; - - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - - SmallVector<DbgScope*, 2> &SD = I->second; - for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); - SDI != SDE; ++SDI) - (*SDI)->setEndLabel(Label); - return; + if (InsnsEndScopeSet.count(MI) != 0) { + // Emit a label if this instruction ends a scope. + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(Label); + LabelsAfterInsn[MI] = Label; + } } -/// createDbgScope - Create DbgScope for the scope. -void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { +/// getOrCreateDbgScope - Create DbgScope for the scope. +DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) - return; + return WScope; WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); DbgScopeMap.insert(std::make_pair(Scope, WScope)); - if (DIDescriptor(Scope).isLexicalBlock()) - createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); - return; + if (DIDescriptor(Scope).isLexicalBlock()) { + DbgScope *Parent = + getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); + WScope->setParent(Parent); + Parent->addScope(WScope); + } + + if (!WScope->getParent()) { + StringRef SPName = DISubprogram(Scope).getLinkageName(); + if (SPName == Asm->MF->getFunction()->getName()) + CurrentFnDbgScope = WScope; + } + + return WScope; } DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); if (WScope) - return; + return WScope; WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); DILocation DL(InlinedAt); - createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); + DbgScope *Parent = + getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); + WScope->setParent(Parent); + Parent->addScope(WScope); + + ConcreteScopes[InlinedAt] = WScope; + getOrCreateAbstractScope(Scope); + + return WScope; } +/// hasValidLocation - Return true if debug location entry attached with +/// machine instruction encodes valid location info. +static bool hasValidLocation(LLVMContext &Ctx, + const MachineInstr *MInsn, + MDNode *&Scope, MDNode *&InlinedAt) { + if (MInsn->isDebugValue()) + return false; + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) return false; + + MDNode *S = DL.getScope(Ctx); + + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + if (DIScope(S).isCompileUnit()) return false; + + Scope = S; + InlinedAt = DL.getInlinedAt(Ctx); + return true; +} + +/// calculateDominanceGraph - Calculate dominance graph for DbgScope +/// hierarchy. +static void calculateDominanceGraph(DbgScope *Scope) { + assert (Scope && "Unable to calculate scop edominance graph!"); + SmallVector<DbgScope *, 4> WorkStack; + WorkStack.push_back(Scope); + unsigned Counter = 0; + while (!WorkStack.empty()) { + DbgScope *WS = WorkStack.back(); + const SmallVector<DbgScope *, 4> &Children = WS->getScopes(); + bool visitedChildren = false; + for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) { + DbgScope *ChildScope = *SI; + if (!ChildScope->getDFSOut()) { + WorkStack.push_back(ChildScope); + visitedChildren = true; + ChildScope->setDFSIn(++Counter); + break; + } + } + if (!visitedChildren) { + WorkStack.pop_back(); + WS->setDFSOut(++Counter); + } + } +} + +/// printDbgScopeInfo - Print DbgScope info for each machine instruction. +static +void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, + DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) +{ +#ifndef NDEBUG + unsigned PrevDFSIn = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + + // Check if instruction has valid location information. + if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + dbgs() << " [ "; + if (InlinedAt) + dbgs() << "*"; + DenseMap<const MachineInstr *, DbgScope *>::iterator DI = + MI2ScopeMap.find(MInsn); + if (DI != MI2ScopeMap.end()) { + DbgScope *S = DI->second; + dbgs() << S->getDFSIn(); + PrevDFSIn = S->getDFSIn(); + } else + dbgs() << PrevDFSIn; + } else + dbgs() << " [ x" << PrevDFSIn; + dbgs() << " ]"; + MInsn->dump(); + } + dbgs() << "\n"; + } +#endif +} /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if at least one scope was found. bool DwarfDebug::extractScopeInformation() { @@ -2197,71 +2340,100 @@ bool DwarfDebug::extractScopeInformation() { if (!DbgScopeMap.empty()) return false; - DenseMap<const MachineInstr *, unsigned> MIIndexMap; - unsigned MIIndex = 0; - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); - // Scan each instruction and create scopes. First build working set of scopes. + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + SmallVector<DbgRange, 4> MIRanges; + DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; + MDNode *PrevScope = NULL; + MDNode *PrevInlinedAt = NULL; + const MachineInstr *RangeBeginMI = NULL; + const MachineInstr *PrevMI = NULL; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - // FIXME : Remove DBG_VALUE check. - if (MInsn->isDebugValue()) continue; - MIIndexMap[MInsn] = MIIndex++; - - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) continue; - - MDNode *Scope = DL.getScope(Ctx); - - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(Scope).isCompileUnit()) continue; - createDbgScope(Scope, DL.getInlinedAt(Ctx)); - } - } + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + // Check if instruction has valid location information. + if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + PrevMI = MInsn; + continue; + } + + // If scope has not changed then skip this instruction. + if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { + PrevMI = MInsn; + continue; + } - // Build scope hierarchy using working set of scopes. - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - // FIXME : Remove DBG_VALUE check. - if (MInsn->isDebugValue()) continue; - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) continue; + if (RangeBeginMI) { + // If we have alread seen a beginning of a instruction range and + // current instruction scope does not match scope of first instruction + // in this range then create a new instruction range. + DbgRange R(RangeBeginMI, PrevMI); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MIRanges.push_back(R); + } - MDNode *Scope = DL.getScope(Ctx); - if (Scope == 0) continue; + // This is a beginning of a new instruction range. + RangeBeginMI = MInsn; - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(Scope).isCompileUnit()) continue; - DbgScope *DScope = getUpdatedDbgScope(Scope, MInsn, DL.getInlinedAt(Ctx)); - DScope->setLastInsn(MInsn); + // Reset previous markers. + PrevMI = MInsn; + PrevScope = Scope; + PrevInlinedAt = InlinedAt; } } + // Create last instruction range. + if (RangeBeginMI && PrevMI && PrevScope) { + DbgRange R(RangeBeginMI, PrevMI); + MIRanges.push_back(R); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + } + if (!CurrentFnDbgScope) return false; - CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap); + calculateDominanceGraph(CurrentFnDbgScope); + if (PrintDbgScope) + printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap); + + // Find ranges of instructions covered by each DbgScope; + DbgScope *PrevDbgScope = NULL; + for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(), + RE = MIRanges.end(); RI != RE; ++RI) { + const DbgRange &R = *RI; + DbgScope *S = MI2ScopeMap.lookup(R.first); + assert (S && "Lost DbgScope for a machine instruction!"); + if (PrevDbgScope && !PrevDbgScope->dominates(S)) + PrevDbgScope->closeInsnRange(S); + S->openInsnRange(R.first); + S->extendInsnRange(R.second); + PrevDbgScope = S; + } - // Each scope has first instruction and last instruction to mark beginning - // and end of a scope respectively. Create an inverse map that list scopes - // starts (and ends) with an instruction. One instruction may start (or end) - // multiple scopes. Ignore scopes that are not reachable. + if (PrevDbgScope) + PrevDbgScope->closeInsnRange(); + + identifyScopeMarkers(); + + return !DbgScopeMap.empty(); +} + +/// identifyScopeMarkers() - +/// Each DbgScope has first instruction and last instruction to mark beginning +/// and end of a scope respectively. Create an inverse map that list scopes +/// starts (and ends) with an instruction. One instruction may start (or end) +/// multiple scopes. Ignore scopes that are not reachable. +void DwarfDebug::identifyScopeMarkers() { SmallVector<DbgScope *, 4> WorkList; WorkList.push_back(CurrentFnDbgScope); while (!WorkList.empty()) { DbgScope *S = WorkList.pop_back_val(); - + const SmallVector<DbgScope *, 4> &Children = S->getScopes(); if (!Children.empty()) for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), @@ -2270,45 +2442,51 @@ bool DwarfDebug::extractScopeInformation() { if (S->isAbstractScope()) continue; - const MachineInstr *MI = S->getFirstInsn(); - assert(MI && "DbgScope does not have first instruction!"); - - InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI); - if (IDI != DbgScopeBeginMap.end()) - IDI->second.push_back(S); - else - DbgScopeBeginMap[MI].push_back(S); - - MI = S->getLastInsn(); - assert(MI && "DbgScope does not have last instruction!"); - IDI = DbgScopeEndMap.find(MI); - if (IDI != DbgScopeEndMap.end()) - IDI->second.push_back(S); - else - DbgScopeEndMap[MI].push_back(S); + + const SmallVector<DbgRange, 4> &Ranges = S->getRanges(); + if (Ranges.empty()) + continue; + for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + assert(RI->first && "DbgRange does not have first instruction!"); + assert(RI->second && "DbgRange does not have second instruction!"); + InsnsBeginScopeSet.insert(RI->first); + InsnsEndScopeSet.insert(RI->second); + } } +} - return !DbgScopeMap.empty(); +/// FindFirstDebugLoc - Find the first debug location in the function. This +/// is intended to be an approximation for the source position of the +/// beginning of the function. +static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end(); + MBBI != MBBE; ++MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + return DebugLoc(); } /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; - - TimeRegion Timer(DebugTimer); - if (!extractScopeInformation()) - return; + if (!extractScopeInformation()) return; collectVariableInfo(); + FunctionBeginSym = Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber()); // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); + Asm->OutStreamer.EmitLabel(FunctionBeginSym); // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. - DebugLoc FDL = MF->getDefaultDebugLoc(); + DebugLoc FDL = FindFirstDebugLoc(MF); if (FDL.isUnknown()) return; MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); @@ -2329,10 +2507,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { /// endFunction - Gather and emit post-function debug information. /// void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo() || - DbgScopeMap.empty()) return; - - TimeRegion Timer(DebugTimer); + if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return; if (CurrentFnDbgScope) { // Define end label for subprogram. @@ -2355,8 +2530,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { AE = AbstractScopesList.end(); AI != AE; ++AI) constructScopeDIE(*AI); - constructScopeDIE(CurrentFnDbgScope); + DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); + if (!DisableFramePointerElim(*MF)) + addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); + + DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), MMI->getFrameMoves())); } @@ -2364,22 +2544,23 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; DeleteContainerSeconds(DbgScopeMap); - DbgScopeBeginMap.clear(); - DbgScopeEndMap.clear(); + InsnsBeginScopeSet.clear(); + InsnsEndScopeSet.clear(); DbgValueStartMap.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); AbstractVariables.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); Lines.clear(); + PrevLabel = NULL; } /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { - TimeRegion Timer(DebugTimer); - StringRef Dir; StringRef Fn; @@ -2407,17 +2588,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { return Label; } -/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be -/// timed. Look up the source id with the given directory and source file -/// names. If none currently exists, create a new id and insert it in the -/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as -/// well. -unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, - const std::string &FileName) { - TimeRegion Timer(DebugTimer); - return GetOrCreateSourceID(DirName.c_str(), FileName.c_str()); -} - //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// @@ -2481,7 +2651,6 @@ void DwarfDebug::computeSizeAndOffsets() { sizeof(int8_t); // Pointer Size (in bytes) computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true); - CompileUnitOffsets[ModuleCU] = 0; } /// EmitSectionSym - Switch to the specified MCSection and emit an assembler @@ -2522,7 +2691,8 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); - EmitSectionSym(Asm, TLOF.getDwarfRangesSection()); + DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), + "debug_range"); TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); EmitSectionSym(Asm, TLOF.getDataSection()); @@ -2566,6 +2736,15 @@ void DwarfDebug::emitDIE(DIE *Die) { Asm->EmitInt32(Addr); break; } + case dwarf::DW_AT_ranges: { + // DW_AT_range Value encodes offset in debug_range section. + DIEInteger *V = cast<DIEInteger>(Values[i]); + Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, + V->getValue(), + DwarfDebugRangeSectionSym, + 4); + break; + } default: // Emit an attribute using the defined form. Values[i]->EmitValue(Asm, Form); @@ -2900,7 +3079,7 @@ void DwarfDebug::emitCommonDebugFrame() { Asm->EmitFrameMoves(Moves, 0, false); - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end")); } @@ -2942,7 +3121,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false); - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); Asm->OutStreamer.EmitLabel(DebugFrameEnd); } @@ -3087,7 +3266,16 @@ void DwarfDebug::EmitDebugARanges() { void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->getObjFileLowering().getDwarfRangesSection()); + unsigned char Size = Asm->getTargetData().getPointerSize(); + for (SmallVector<const MCSymbol *, 8>::iterator + I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); + I != E; ++I) { + if (*I) + Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0); + else + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + } } /// emitDebugMacInfo - Emit visible names into a debug macinfo section. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index c7baf5f5d38d..b964b23fe6f8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -32,8 +32,8 @@ class DbgVariable; class MachineFrameInfo; class MachineLocation; class MachineModuleInfo; +class MachineOperand; class MCAsmInfo; -class Timer; class DIEAbbrev; class DIE; class DIEBlock; @@ -174,24 +174,14 @@ class DwarfDebug { /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, MDNode *> ContainingTypeMap; - /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs. - SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs; - - /// TopLevelDIEs - Collection of top level DIEs. - SmallPtrSet<DIE *, 4> TopLevelDIEs; - SmallVector<DIE *, 4> TopLevelDIEsVector; - typedef SmallVector<DbgScope *, 2> ScopeVector; - typedef DenseMap<const MachineInstr *, ScopeVector> - InsnToDbgScopeMapTy; - - /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts. - InsnToDbgScopeMapTy DbgScopeBeginMap; - - /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends. - InsnToDbgScopeMapTy DbgScopeEndMap; + SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet; + SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet; /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. @@ -199,18 +189,21 @@ class DwarfDebug { DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo; SmallVector<MDNode *, 4> InlinedSPNodes; - /// CompileUnitOffsets - A vector of the offsets of the compile units. This is - /// used when calculating the "origin" of a concrete instance of an inlined - /// function. - DenseMap<CompileUnit *, unsigned> CompileUnitOffsets; + /// LabelsBeforeInsn - Maps instruction with label emitted before + /// instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; + + /// LabelsAfterInsn - Maps instruction with label emitted after + /// instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + + SmallVector<const MCSymbol *, 8> DebugRangeSymbols; /// Previous instruction's location information. This is used to determine /// label location to indicate scope boundries in dwarf debug info. DebugLoc PrevInstLoc; + MCSymbol *PrevLabel; - /// DebugTimer - Timer for the Dwarf debug writer. - Timer *DebugTimer; - struct FunctionDebugFrameInfo { unsigned Number; std::vector<MachineMove> Moves; @@ -225,8 +218,9 @@ class DwarfDebug { // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; - MCSymbol *DwarfStrSectionSym, *TextSectionSym; - + MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; + + MCSymbol *FunctionBeginSym; private: /// getSourceDirectoryAndFileIds - Return the directory and file ids that @@ -311,6 +305,15 @@ private: void addAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// addRegisterAddress - Add register location entry in variable DIE. + bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on @@ -376,13 +379,8 @@ private: /// createSubprogramDIE - Create new DIE using SP. DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); - /// getUpdatedDbgScope - Find or create DbgScope assicated with - /// the instruction. Initialize scope and update scope hierarchy. - DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt); - - /// createDbgScope - Create DbgScope for the scope. - void createDbgScope(MDNode *Scope, MDNode *InlinedAt); + /// getOrCreateDbgScope - Create DbgScope for the scope. + DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt); DbgScope *getOrCreateAbstractScope(MDNode *N); @@ -531,14 +529,10 @@ private: return Lines.size(); } - /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be - /// timed. Look up the source id with the given directory and source file - /// names. If none currently exists, create a new id and insert it in the - /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as - /// well. - unsigned getOrCreateSourceID(const std::string &DirName, - const std::string &FileName); - + /// identifyScopeMarkers() - Indentify instructions that are marking + /// beginning of or end of a scope. + void identifyScopeMarkers(); + /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool extractScopeInformation(); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 72c97a43085c..0ff1036e89e2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -33,7 +33,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Timer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -41,15 +40,9 @@ using namespace llvm; DwarfException::DwarfException(AsmPrinter *A) : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false), - shouldEmitTableModule(false), shouldEmitMovesModule(false), - ExceptionTimer(0) { - if (TimePassesIsEnabled) - ExceptionTimer = new Timer("DWARF Exception Writer"); -} + shouldEmitTableModule(false), shouldEmitMovesModule(false) {} -DwarfException::~DwarfException() { - delete ExceptionTimer; -} +DwarfException::~DwarfException() {} /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that /// is shared among many Frame Description Entries. There is at least one CIE @@ -159,8 +152,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // On Darwin the linker honors the alignment of eh_frame, which means it must // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get // holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3, - 0, 0, false); + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index)); } @@ -262,8 +254,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // On Darwin the linker honors the alignment of eh_frame, which means it // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you // get holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3, - 0, 0, false); + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number)); @@ -432,7 +423,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { if (!MO.isGlobal()) continue; - Function *F = dyn_cast<Function>(MO.getGlobal()); + const Function *F = dyn_cast<Function>(MO.getGlobal()); if (F == 0) continue; if (SawFunc) { @@ -586,7 +577,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. void DwarfException::EmitExceptionTable() { - const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); @@ -692,7 +683,7 @@ void DwarfException::EmitExceptionTable() { // Begin the exception table. Asm->OutStreamer.SwitchSection(LSDASection); - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); // Emit the LSDA. MCSymbol *GCCETSym = @@ -868,7 +859,7 @@ void DwarfException::EmitExceptionTable() { Asm->OutStreamer.AddComment("-- Catch TypeInfos --"); Asm->OutStreamer.AddBlankLine(); } - for (std::vector<GlobalVariable *>::const_reverse_iterator + for (std::vector<const GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; @@ -891,7 +882,7 @@ void DwarfException::EmitExceptionTable() { Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0); } - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); } /// EndModule - Emit all exception information that should come after the @@ -903,9 +894,7 @@ void DwarfException::EndModule() { if (!shouldEmitMovesModule && !shouldEmitTableModule) return; - TimeRegion Timer(ExceptionTimer); - - const std::vector<Function *> Personalities = MMI->getPersonalities(); + const std::vector<const Function *> Personalities = MMI->getPersonalities(); for (unsigned I = 0, E = Personalities.size(); I < E; ++I) EmitCIE(Personalities[I], I); @@ -918,7 +907,6 @@ void DwarfException::EndModule() { /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfException::BeginFunction(const MachineFunction *MF) { - TimeRegion Timer(ExceptionTimer); shouldEmitTable = shouldEmitMoves = false; // If any landing pads survive, we need an EH table. @@ -942,7 +930,6 @@ void DwarfException::BeginFunction(const MachineFunction *MF) { void DwarfException::EndFunction() { if (!shouldEmitMoves && !shouldEmitTable) return; - TimeRegion Timer(ExceptionTimer); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f35c0b616c1f..5839f8c0d4aa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -28,7 +28,6 @@ class MachineFunction; class MCAsmInfo; class MCExpr; class MCSymbol; -class Timer; class Function; class AsmPrinter; @@ -82,9 +81,6 @@ class DwarfException { /// should be emitted. bool shouldEmitMovesModule; - /// ExceptionTimer - Timer for the Dwarf exception writer. - Timer *ExceptionTimer; - /// EmitCIE - Emit a Common Information Entry (CIE). This holds information /// that is shared among many Frame Description Entries. There is at least /// one CIE in every non-empty .debug_frame section. diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 1db178f020ec..a8c3c7b21765 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -109,13 +109,11 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { uint64_t FrameSize = FI.getFrameSize(); if (FrameSize >= 1<<16) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Function '" << FI.getFunction().getName() - << "' is too large for the ocaml GC! " - << "Frame size " << FrameSize << " >= 65536.\n"; - Msg << "(" << uintptr_t(&FI) << ")"; - llvm_report_error(Msg.str()); // Very rude! + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Frame size " + Twine(FrameSize) + ">= 65536.\n" + "(" + Twine(uintptr_t(&FI)) + ")"); } AP.OutStreamer.AddComment("live roots for " + @@ -125,12 +123,10 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { size_t LiveCount = FI.live_size(J); if (LiveCount >= 1<<16) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Function '" << FI.getFunction().getName() - << "' is too large for the ocaml GC! " - << "Live root count " << LiveCount << " >= 65536."; - llvm_report_error(Msg.str()); // Very rude! + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Live root count "+Twine(LiveCount)+" >= 65536."); } AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 8f519407ccd4..9dec22ec78a3 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -264,14 +264,8 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { return Hash; } -/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks -/// with no successors, we hash two instructions, because cross-jumping -/// only saves code when at least two instructions are removed (since a -/// branch must be inserted). For blocks with a successor, one of the -/// two blocks to be tail-merged will end with a branch already, so -/// it gains to cross-jump even for one instruction. -static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, - unsigned minCommonTailLength) { +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { MachineBasicBlock::const_iterator I = MBB->end(); if (I == MBB->begin()) return 0; // Empty MBB. @@ -283,20 +277,8 @@ static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, return 0; // MBB empty except for debug info. --I; } - unsigned Hash = HashMachineInstr(I); - if (I == MBB->begin() || minCommonTailLength == 1) - return Hash; // Single instr MBB. - - --I; - while (I->isDebugValue()) { - if (I==MBB->begin()) - return Hash; // MBB with single non-debug instr. - --I; - } - // Hash in the second-to-last instruction. - Hash ^= HashMachineInstr(I) << 2; - return Hash; + return HashMachineInstr(I); } /// ComputeCommonTailLength - Given two machine basic blocks, compute the number @@ -811,7 +793,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } // See if we can do any tail merging on those. @@ -897,8 +879,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // reinsert conditional branch only, for now TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U), - *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } if (MergePotentials.size() >= 2) diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 62d18836c93e..3e38872a36d6 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMCodeGen + Analysis.cpp AggressiveAntiDepBreaker.cpp BranchFolding.cpp CalcSpillWeights.cpp @@ -49,6 +50,7 @@ add_llvm_library(LLVMCodeGen ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + RegAllocFast.cpp RegAllocLinearScan.cpp RegAllocLocal.cpp RegAllocPBQP.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 7d3de89ada2a..759fbaaef30a 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -143,13 +143,13 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. -static SDep *CriticalPathStep(SUnit *SU) { - SDep *Next = 0; +static const SDep *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); + const SUnit *PredSU = P->getSUnit(); unsigned PredLatency = P->getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over @@ -326,18 +326,18 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, } unsigned CriticalAntiDepBreaker:: -BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, +BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; // Find the node at the bottom of the critical path. - SUnit *Max = 0; + const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; + const SUnit *SU = &SUnits[i]; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } @@ -357,7 +357,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // Track progress along the critical path through the SUnit graph as we walk // the instructions. - SUnit *CriticalPathSU = Max; + const SUnit *CriticalPathSU = Max; MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); // Consider this pattern: @@ -429,8 +429,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; if (MI == CriticalPathMI) { - if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { - SUnit *NextSU = Edge->getSUnit(); + if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { + const SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() == SDep::Anti) { @@ -452,7 +452,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. - for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), + for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), PE = CriticalPathSU->Preds.end(); P != PE; ++P) if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 9e8db022621a..cc42dd2b8e32 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -72,9 +72,9 @@ namespace llvm { /// path /// of the ScheduleDAG and break them by renaming registers. /// - unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex); /// Observe - Update liveness information to account for the current diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 7dbfd7d168bd..f6739f434044 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -34,6 +34,7 @@ STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced"); namespace { class DwarfEHPrepare : public FunctionPass { + const TargetMachine *TM; const TargetLowering *TLI; bool CompileFast; @@ -84,7 +85,7 @@ namespace { } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the ".llvm.eh.catch.all.value" call need to convert to using it's + /// use the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool CleanupSelectors(); @@ -154,8 +155,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetLowering *tli, bool fast) : - FunctionPass(&ID), TLI(tli), CompileFast(fast), + DwarfEHPrepare(const TargetMachine *tm, bool fast) : + FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()), + CompileFast(fast), ExceptionValueIntrinsic(0), SelectorIntrinsic(0), URoR(0), EHCatchAllValue(0), RewindFunction(0) {} @@ -180,8 +182,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { - return new DwarfEHPrepare(tli, fast); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { + return new DwarfEHPrepare(tm, fast); } /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. @@ -218,7 +220,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the ".llvm.eh.catch.all.value" call need to convert to using it's +/// the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool DwarfEHPrepare::CleanupSelectors() { if (!EHCatchAllValue) return false; @@ -421,7 +423,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { bool DwarfEHPrepare::NormalizeLandingPads() { bool Changed = false; - const MCAsmInfo *MAI = TLI->getTargetMachine().getMCAsmInfo(); + const MCAsmInfo *MAI = TM->getMCAsmInfo(); bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index eda167cb54a5..b644ebeb4be5 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -208,7 +208,7 @@ ELFSection &ELFWriter::getDtorSection() { } // getTextSection - Get the text section for the specified function -ELFSection &ELFWriter::getTextSection(Function *F) { +ELFSection &ELFWriter::getTextSection(const Function *F) { const MCSectionELF *Text = (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); @@ -507,7 +507,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { std::string msg; raw_string_ostream ErrorMsg(msg); ErrorMsg << "Constant unimp for type: " << *CV->getType(); - llvm_report_error(ErrorMsg.str()); + report_fatal_error(ErrorMsg.str()); } // ResolveConstantExpr - Resolve the constant expression until it stop @@ -572,10 +572,8 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { } } - std::string msg(CE->getOpcodeName()); - raw_string_ostream ErrorMsg(msg); - ErrorMsg << ": Unsupported ConstantExpr type"; - llvm_report_error(ErrorMsg.str()); + report_fatal_error(CE->getOpcodeName() + + StringRef(": Unsupported ConstantExpr type")); return std::make_pair(CV, 0); // silence warning } diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index b61b4848b654..db66ecc6dd83 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -191,7 +191,7 @@ namespace llvm { ELFSection &getDtorSection(); ELFSection &getJumpTableSection(); ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); - ELFSection &getTextSection(Function *F); + ELFSection &getTextSection(const Function *F); ELFSection &getRelocSection(ELFSection &S); // Helpers for obtaining ELF specific info. diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 61959bba0e23..af5f2892c2f0 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -29,7 +29,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. - ScoreboardDepth = 1; + unsigned ScoreboardDepth = 1; if (!ItinData.isEmpty()) { for (unsigned idx = 0; ; ++idx) { if (ItinData.isEndMarker(idx)) @@ -45,35 +45,27 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : } } - Scoreboard = new unsigned[ScoreboardDepth]; - ScoreboardHead = 0; + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " << ScoreboardDepth << '\n'); } -ExactHazardRecognizer::~ExactHazardRecognizer() { - delete [] Scoreboard; -} - void ExactHazardRecognizer::Reset() { - memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned)); - ScoreboardHead = 0; + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); } -unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) { - return (ScoreboardHead + offset) % ScoreboardDepth; -} - -void ExactHazardRecognizer::dumpScoreboard() { +void ExactHazardRecognizer::ScoreBoard::dump() const { dbgs() << "Scoreboard:\n"; - - unsigned last = ScoreboardDepth - 1; - while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0)) + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) last--; for (unsigned i = 0; i <= last; i++) { - unsigned FUs = Scoreboard[getFutureIndex(i)]; + unsigned FUs = (*this)[i]; dbgs() << "\t"; for (int j = 31; j >= 0; j--) dbgs() << ((FUs & (1 << j)) ? '1' : '0'); @@ -96,11 +88,23 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < ScoreboardDepth) && + assert(((cycle + i) < RequiredScoreboard.getDepth()) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle + i); - unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + if (!freeUnits) { DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); @@ -108,14 +112,14 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU return Hazard; } } - + // Advance the cycle to the next stage. cycle += IS->getNextCycles(); } return NoHazard; } - + void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { if (ItinData.isEmpty()) return; @@ -125,37 +129,52 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), + for (const InstrStage *IS = ItinData.beginStage(idx), *E = ItinData.endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the // stage is occupied. FIXME it would be more accurate to reserve // the same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < ScoreboardDepth) && + assert(((cycle + i) < RequiredScoreboard.getDepth()) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle + i); - unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; - + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + // reduce to a single unit unsigned freeUnit = 0; do { freeUnit = freeUnits; freeUnits = freeUnit & (freeUnit - 1); } while (freeUnits); - + assert(freeUnit && "No function unit available!"); - Scoreboard[index] |= freeUnit; + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; } - + // Advance the cycle to the next stage. cycle += IS->getNextCycles(); } - - DEBUG(dumpScoreboard()); + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); } - + void ExactHazardRecognizer::AdvanceCycle() { - Scoreboard[ScoreboardHead] = 0; - ScoreboardHead = getFutureIndex(1); + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h index 71ac979e6cd8..91c81a970fa5 100644 --- a/lib/CodeGen/ExactHazardRecognizer.h +++ b/lib/CodeGen/ExactHazardRecognizer.h @@ -22,35 +22,60 @@ namespace llvm { class ExactHazardRecognizer : public ScheduleHazardRecognizer { - // Itinerary data for the target. - const InstrItineraryData &ItinData; - - // Scoreboard to track function unit usage. Scoreboard[0] is a + // ScoreBoard to track function unit usage. ScoreBoard[0] is a // mask of the FUs in use in the cycle currently being - // schedule. Scoreboard[1] is a mask for the next cycle. The - // Scoreboard is used as a circular buffer with the current cycle - // indicated by ScoreboardHead. - unsigned *Scoreboard; + // schedule. ScoreBoard[1] is a mask for the next cycle. The + // ScoreBoard is used as a circular buffer with the current cycle + // indicated by Head. + class ScoreBoard { + unsigned *Data; + + // The maximum number of cycles monitored by the Scoreboard. This + // value is determined based on the target itineraries to ensure + // that all hazards can be tracked. + size_t Depth; + // Indices into the Scoreboard that represent the current cycle. + size_t Head; + public: + ScoreBoard():Data(NULL), Depth(0), Head(0) { } + ~ScoreBoard() { + delete[] Data; + } + + size_t getDepth() const { return Depth; } + unsigned& operator[](size_t idx) const { + assert(Depth && "ScoreBoard was not initialized properly!"); + + return Data[(Head + idx) % Depth]; + } - // The maximum number of cycles monitored by the Scoreboard. This - // value is determined based on the target itineraries to ensure - // that all hazards can be tracked. - unsigned ScoreboardDepth; + void reset(size_t d = 1) { + if (Data == NULL) { + Depth = d; + Data = new unsigned[Depth]; + } - // Indices into the Scoreboard that represent the current cycle. - unsigned ScoreboardHead; + memset(Data, 0, Depth * sizeof(Data[0])); + Head = 0; + } - // Return the scoreboard index to use for 'offset' cycles in the - // future. 'offset' of 0 returns ScoreboardHead. - unsigned getFutureIndex(unsigned offset); + void advance() { + Head = (Head + 1) % Depth; + } - // Print the scoreboard. - void dumpScoreboard(); + // Print the scoreboard. + void dump() const; + }; + + // Itinerary data for the target. + const InstrItineraryData &ItinData; + + ScoreBoard ReservedScoreboard; + ScoreBoard RequiredScoreboard; public: ExactHazardRecognizer(const InstrItineraryData &ItinData); - ~ExactHazardRecognizer(); - + virtual HazardType getHazardType(SUnit *SU); virtual void Reset(); virtual void EmitInstruction(SUnit *SU); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 6d7cc51547d2..790cb2164897 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -181,9 +181,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) if (!InitedRoots.count(*I)) { - new StoreInst(ConstantPointerNull::get(cast<PointerType>( - cast<PointerType>((*I)->getType())->getElementType())), - *I, IP); + StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); MadeChange = true; } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 87ab7ef52e54..e1c52f72ac42 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -331,15 +331,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { IRBuilder<> Builder(CI->getParent(), CI); LLVMContext &Context = CI->getContext(); - Function *Callee = CI->getCalledFunction(); + const Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: - llvm_report_error("Cannot lower a call to a non-intrinsic function '"+ + report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ Callee->getName() + "'!"); default: - llvm_report_error("Code generator does not support intrinsic function '"+ + report_fatal_error("Code generator does not support intrinsic function '"+ Callee->getName()+"'!"); // The setjmp/longjmp intrinsics should only exist in the code if it was diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index ed57f4cb101f..331dc7d4af6e 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -50,6 +50,9 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, cl::desc("Disable Machine Sinking")); static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, @@ -245,10 +248,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); - PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); break; case ExceptionHandling::Dwarf: - PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); break; case ExceptionHandling::None: PM.add(createLowerInvokePass(getTargetLowering())); @@ -337,12 +340,18 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createRegisterAllocator()); printAndVerify(PM, "After Register Allocation"); - // Perform stack slot coloring. - if (OptLevel != CodeGenOpt::None && !DisableSSC) { + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. - PM.add(createStackSlotColoringPass(false)); - printAndVerify(PM, "After StackSlotColoring"); + if (!DisableSSC) + PM.add(createStackSlotColoringPass(false)); + + // Run post-ra machine LICM to hoist reloads / remats. + if (!DisablePostRAMachineLICM) + PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); } // Run post-ra passes. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 23cff0786a5e..26a7190110f9 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -665,10 +665,11 @@ void LiveIntervals::computeIntervals() { // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(dbgs() << MBB->getName() << ":\n"); + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << ":\t\t# derived from " << MBB->getName() << "\n"); // Create intervals for live-ins to this BB first. - for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), LE = MBB->livein_end(); LI != LE; ++LI) { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); // Multiple live-ins can alias the same register. @@ -1296,9 +1297,26 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MachineOperand &O = ri.getOperand(); ++ri; if (MI->isDebugValue()) { - // Remove debug info for now. - O.setReg(0U); + // Modify DBG_VALUE now that the value is in a spill slot. + if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + int FI = isLoadSS ? LdSlot : (int)Slot; + if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + ReplaceMachineInstrInMaps(MI, NewDV); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + continue; + } + } + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); continue; } assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); @@ -2085,7 +2103,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, << "constraints:\n"; MI->print(Msg, tm_); } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) { if (!hasInterval(*AS)) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index ca8ecff8e61d..079684eea079 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -531,7 +531,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; - for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index bd0ccb40f53b..eaaa1f85b563 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -191,7 +191,7 @@ void MachineBasicBlock::print(raw_ostream &OS) const { const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); if (!livein_empty()) { OS << " Live Ins:"; - for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) OutputReg(OS, *I, TRI); OS << '\n'; } @@ -218,13 +218,14 @@ void MachineBasicBlock::print(raw_ostream &OS) const { } void MachineBasicBlock::removeLiveIn(unsigned Reg) { - livein_iterator I = std::find(livein_begin(), livein_end(), Reg); - assert(I != livein_end() && "Not a live in!"); + std::vector<unsigned>::iterator I = + std::find(LiveIns.begin(), LiveIns.end(), Reg); + assert(I != LiveIns.end() && "Not a live in!"); LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(unsigned Reg) const { - const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); return I != livein_end(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 597d51d4f370..84c3d717f7f9 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" @@ -51,9 +52,12 @@ namespace { } private: - unsigned CurrVN; + typedef ScopedHashTableScope<MachineInstr*, unsigned, + MachineInstrExpressionTrait> ScopeType; + DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; SmallVector<MachineInstr*, 64> Exps; + unsigned CurrVN; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -63,7 +67,13 @@ namespace { bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); - bool ProcessBlock(MachineDomTreeNode *Node); + void EnterScope(MachineBasicBlock *MBB); + void ExitScope(MachineBasicBlock *MBB); + bool ProcessBlock(MachineBasicBlock *MBB); + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace @@ -277,13 +287,24 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, return CSBBs.count(MI->getParent()); } -bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { +void MachineCSE::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + ScopeType *Scope = new ScopeType(VNT); + ScopeMap[MBB] = Scope; +} + +void MachineCSE::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); + assert(SI != ScopeMap.end()); + ScopeMap.erase(SI); + delete SI->second; +} + +bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; - ScopedHashTableScope<MachineInstr*, unsigned, - MachineInstrExpressionTrait> VNTS(VNT); - MachineBasicBlock *MBB = Node->getBlock(); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -356,10 +377,63 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { CSEPairs.clear(); } - // Recursively call ProcessBlock with childred. - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= ProcessBlock(Children[i]); + return Changed; +} + +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void +MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(Node); + do { + Node = WorkList.pop_back_val(); + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + OpenChildren[Node] = NumChildren; + for (unsigned i = 0; i != NumChildren; ++i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + // Now perform CSE. + bool Changed = false; + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); + EnterScope(MBB); + Changed |= ProcessBlock(MBB); + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); + } return Changed; } @@ -370,5 +444,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); - return ProcessBlock(DT->getRootNode()); + return PerformCSE(DT->getRootNode()); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index e4ed7dbac4e7..3cf10b3ac65d 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -51,7 +51,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } -MachineFunction::MachineFunction(Function *F, const TargetMachine &TM, +MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) { if (TM.getRegisterInfo()) @@ -630,7 +630,7 @@ MachineConstantPool::~MachineConstantPool() { /// CanShareConstantPoolEntry - Test whether the given two constants /// can be allocated the same constant pool entry. -static bool CanShareConstantPoolEntry(Constant *A, Constant *B, +static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, const TargetData *TD) { // Handle the trivial case quickly. if (A == B) return true; @@ -645,17 +645,17 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B, // If a floating-point value and an integer value have the same encoding, // they can share a constant-pool entry. - if (ConstantFP *AFP = dyn_cast<ConstantFP>(A)) - if (ConstantInt *BI = dyn_cast<ConstantInt>(B)) + if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A)) + if (const ConstantInt *BI = dyn_cast<ConstantInt>(B)) return AFP->getValueAPF().bitcastToAPInt() == BI->getValue(); - if (ConstantFP *BFP = dyn_cast<ConstantFP>(B)) - if (ConstantInt *AI = dyn_cast<ConstantInt>(A)) + if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B)) + if (const ConstantInt *AI = dyn_cast<ConstantInt>(A)) return BFP->getValueAPF().bitcastToAPInt() == AI->getValue(); // Two vectors can share an entry if each pair of corresponding // elements could. - if (ConstantVector *AV = dyn_cast<ConstantVector>(A)) - if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) { + if (const ConstantVector *AV = dyn_cast<ConstantVector>(A)) + if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) { if (AV->getType()->getNumElements() != BV->getType()->getNumElements()) return false; for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i) @@ -674,7 +674,7 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B, /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 3b2eb6d388b4..07a0f45c0f48 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -44,7 +44,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) { MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); assert(MMI && "MMI not around yet??"); MMI->setModule(&M); - NextFnNum = 1; return false; + NextFnNum = 0; + return false; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 39b7fb507f8b..99b5beb13656 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -192,6 +192,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getBlockAddress() == Other.getBlockAddress(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); } } @@ -409,19 +411,14 @@ void MachineInstr::addImplicitDefUseOperands() { addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } -/// MachineInstr ctor - This constructor create a MachineInstr and add the -/// implicit operands. It reserves space for number of operands specified by -/// TargetInstrDesc or the numOperands if it is not zero. (for -/// instructions with variable number of operands). +/// MachineInstr ctor - This constructor creates a MachineInstr and adds the +/// implicit operands. It reserves space for the number of operands specified by +/// the TargetInstrDesc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { - if (!NoImp && TID->getImplicitDefs()) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (!NoImp && TID->getImplicitUses()) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + if (!NoImp) + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); @@ -434,12 +431,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { - if (!NoImp && TID->getImplicitDefs()) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (!NoImp && TID->getImplicitUses()) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + if (!NoImp) + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); @@ -450,17 +443,11 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -/// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -475,12 +462,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -1123,6 +1105,19 @@ unsigned MachineInstr::isConstantValuePHI() const { return Reg; } +/// allDefsAreDead - Return true if all the defs of this instruction are dead. +/// +bool MachineInstr::allDefsAreDead() const { + for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.isUse()) + continue; + if (!MO.isDead()) + return false; + } + return true; +} + void MachineInstr::dump() const { dbgs() << " " << *this; } @@ -1192,7 +1187,15 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (TOI.isOptionalDef()) OS << "opt:"; } - MO.print(OS, TM); + if (isDebugValue() && MO.isMetadata()) { + // Pretty print DBG_VALUE instructions. + const MDNode *MD = MO.getMetadata(); + if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2))) + OS << "!\"" << MDS->getString() << '\"'; + else + MO.print(OS, TM); + } else + MO.print(OS, TM); } // Briefly indicate whether any call clobbers were omitted. diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 0361694d6636..b2e757d8d65d 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -22,8 +22,8 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,6 +33,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -41,32 +42,41 @@ using namespace llvm; STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); +STATISTIC(NumPostRAHoisted, + "Number of machine instructions hoisted out of loops post regalloc"); namespace { class MachineLICM : public MachineFunctionPass { - MachineConstantPool *MCP; + bool PreRegAlloc; + const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - BitVector AllocatableSet; + const MachineFrameInfo *MFI; + MachineRegisterInfo *RegInfo; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. - MachineLoopInfo *LI; // Current MachineLoopInfo + MachineLoopInfo *MLI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop - MachineRegisterInfo *RegInfo; // Machine register information // State that is updated as we process loops bool Changed; // True if a loop is changed. - bool FirstInLoop; // True if it's the first LICM in the loop. MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + BitVector AllocatableSet; + // For each opcode, keep a list of potentail CSE instructions. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + public: static char ID; // Pass identification, replacement for typeid - MachineLICM() : MachineFunctionPass(&ID) {} + MachineLICM() : + MachineFunctionPass(&ID), PreRegAlloc(true) {} + + explicit MachineLICM(bool PreRA) : + MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -88,6 +98,39 @@ namespace { } private: + /// CandidateInfo - Keep track of information about hoisting candidates. + struct CandidateInfo { + MachineInstr *MI; + unsigned Def; + int FI; + CandidateInfo(MachineInstr *mi, unsigned def, int fi) + : MI(mi), Def(def), FI(fi) {} + }; + + /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop + /// invariants out to the preheader. + void HoistRegionPostRA(); + + /// HoistPostRA - When an instruction is found to only use loop invariant + /// operands that is safe to hoist, this instruction is called to do the + /// dirty work. + void HoistPostRA(MachineInstr *MI, unsigned Def); + + /// ProcessMI - Examine the instruction for potentai LICM candidate. Also + /// gather register def and frame object update information. + void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates); + + /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the + /// current loop. + void AddToLiveIns(unsigned Reg); + + /// IsLICMCandidate - Returns true if the instruction may be a suitable + /// candidate for LICM. e.g. If the instruction is a call, then it's obviously + /// not safe to hoist it. + bool IsLICMCandidate(MachineInstr &I); + /// IsLoopInvariantInst - Returns true if the instruction is loop /// invariant. I.e., all virtual register operands are defined outside of /// the loop, physical registers aren't accessed (explicitly or implicitly), @@ -145,7 +188,9 @@ char MachineLICM::ID = 0; static RegisterPass<MachineLICM> X("machinelicm", "Machine Loop Invariant Code Motion"); -FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); } +FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { + return new MachineLICM(PreRegAlloc); +} /// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most /// loop that has a preheader. @@ -156,31 +201,31 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { return true; } -/// Hoist expressions out of the specified loop. Note, alias info for inner loop -/// is not preserved so it is not a good idea to run LICM multiple times on one -/// loop. -/// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "******** Machine LICM ********\n"); + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); - Changed = FirstInLoop = false; - MCP = MF.getConstantPool(); + Changed = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); + MFI = MF.getFrameInfo(); RegInfo = &MF.getRegInfo(); AllocatableSet = TRI->getAllocatableSet(MF); // Get our Loop information... - LI = &getAnalysis<MachineLoopInfo>(); - DT = &getAnalysis<MachineDominatorTree>(); - AA = &getAnalysis<AliasAnalysis>(); + MLI = &getAnalysis<MachineLoopInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); - for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ CurLoop = *I; - // Only visit outer-most preheader-sporting loops. - if (!LoopIsOuterMostWithPreheader(CurLoop)) + // If this is done before regalloc, only visit outer-most preheader-sporting + // loops. + if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) continue; // Determine the block to which to hoist instructions. If we can't find a @@ -193,16 +238,230 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (!CurPreheader) continue; - // CSEMap is initialized for loop header when the first instruction is - // being hoisted. - FirstInLoop = true; - HoistRegion(DT->getNode(CurLoop->getHeader())); - CSEMap.clear(); + if (!PreRegAlloc) + HoistRegionPostRA(); + else { + // CSEMap is initialized for loop header when the first instruction is + // being hoisted. + MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + HoistRegion(N); + CSEMap.clear(); + } } return Changed; } +/// InstructionStoresToFI - Return true if instruction stores to the +/// specified frame. +static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); o != oe; ++o) { + if (!(*o)->isStore() || !(*o)->getValue()) + continue; + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + if (Value->getFrameIndex() == FI) + return true; + } + } + return false; +} + +/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// gather register def and frame object update information. +void MachineLICM::ProcessMI(MachineInstr *MI, + unsigned *PhysRegDefs, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates) { + bool RuledOut = false; + bool HasNonInvariantUse = false; + unsigned Def = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isFI()) { + // Remember if the instruction stores to the frame index. + int FI = MO.getIndex(); + if (!StoredFIs.count(FI) && + MFI->isSpillSlotObjectIndex(FI) && + InstructionStoresToFI(MI, FI)) + StoredFIs.insert(FI); + HasNonInvariantUse = true; + continue; + } + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Not expecting virtual register!"); + + if (!MO.isDef()) { + if (Reg && PhysRegDefs[Reg]) + // If it's using a non-loop-invariant register, then it's obviously not + // safe to hoist. + HasNonInvariantUse = true; + continue; + } + + if (MO.isImplicit()) { + ++PhysRegDefs[Reg]; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + ++PhysRegDefs[*AS]; + if (!MO.isDead()) + // Non-dead implicit def? This cannot be hoisted. + RuledOut = true; + // No need to check if a dead implicit def is also defined by + // another instruction. + continue; + } + + // FIXME: For now, avoid instructions with multiple defs, unless + // it's a dead implicit def. + if (Def) + RuledOut = true; + else + Def = Reg; + + // If we have already seen another instruction that defines the same + // register, then this is not safe. + if (++PhysRegDefs[Reg] > 1) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + if (++PhysRegDefs[*AS] > 1) + RuledOut = true; + } + + // Only consider reloads for now and remats which do not have register + // operands. FIXME: Consider unfold load folding instructions. + if (Def && !RuledOut) { + int FI = INT_MIN; + if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || + (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) + Candidates.push_back(CandidateInfo(MI, Def, FI)); + } +} + +/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop +/// invariants out to the preheader. +void MachineLICM::HoistRegionPostRA() { + unsigned NumRegs = TRI->getNumRegs(); + unsigned *PhysRegDefs = new unsigned[NumRegs]; + std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); + + SmallVector<CandidateInfo, 32> Candidates; + SmallSet<int, 32> StoredFIs; + + // Walk the entire region, count number of defs for each register, and + // collect potential LICM candidates. + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + // Conservatively treat live-in's as an external def. + // FIXME: That means a reload that're reused in successor block(s) will not + // be LICM'ed. + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + ++PhysRegDefs[Reg]; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + ++PhysRegDefs[*AS]; + } + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); + } + } + + // Now evaluate whether the potential candidates qualify. + // 1. Check if the candidate defined register is defined by another + // instruction in the loop. + // 2. If the candidate is a load from stack slot (always true for now), + // check if the slot is stored anywhere in the loop. + for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { + if (Candidates[i].FI != INT_MIN && + StoredFIs.count(Candidates[i].FI)) + continue; + + if (PhysRegDefs[Candidates[i].Def] == 1) { + bool Safe = true; + MachineInstr *MI = Candidates[i].MI; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.isDef() || !MO.getReg()) + continue; + if (PhysRegDefs[MO.getReg()]) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + Safe = false; + break; + } + } + if (Safe) + HoistPostRA(MI, Candidates[i].Def); + } + } + + delete[] PhysRegDefs; +} + +/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current +/// loop, and make sure it is not killed by any instructions in the loop. +void MachineLICM::AddToLiveIns(unsigned Reg) { + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (!BB->isLiveIn(Reg)) + BB->addLiveIn(Reg); + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; + if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) + MO.setIsKill(false); + } + } + } +} + +/// HoistPostRA - When an instruction is found to only use loop invariant +/// operands that is safe to hoist, this instruction is called to do the +/// dirty work. +void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (CurPreheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << CurPreheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // Splice the instruction to the preheader. + MachineBasicBlock *MBB = MI->getParent(); + CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); + + // Add register to livein list to all the BBs in the current loop since a + // loop invariant must be kept live throughout the whole loop. This is + // important to ensure later passes do not scavenge the def register. + AddToLiveIns(Def); + + ++NumPostRAHoisted; + Changed = true; +} + /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -223,17 +482,17 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { } const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) HoistRegion(Children[I]); } -/// IsLoopInvariantInst - Returns true if the instruction is loop -/// invariant. I.e., all virtual register operands are defined outside of the -/// loop, physical registers aren't accessed explicitly, and there are no side -/// effects that aren't captured by the operands or other flags. -/// -bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { +/// IsLICMCandidate - Returns true if the instruction may be a suitable +/// candidate for LICM. e.g. If the instruction is a call, then it's obviously +/// not safe to hoist it. +bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + if (I.isImplicitDef()) + return false; + const TargetInstrDesc &TID = I.getDesc(); // Ignore stuff that we obviously can't hoist. @@ -251,6 +510,17 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // This is a trivial form of alias analysis. return false; } + return true; +} + +/// IsLoopInvariantInst - Returns true if the instruction is loop +/// invariant. I.e., all virtual register operands are defined outside of the +/// loop, physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + if (!IsLICMCandidate(I)) + return false; // The instruction is loop invariant if all of its operands are. for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { @@ -341,9 +611,6 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { /// IsProfitableToHoist - Return true if it is potentially profitable to hoist /// the given loop invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { - if (MI.isImplicitDef()) - return false; - // FIXME: For now, only hoist re-materilizable instructions. LICM will // increase register pressure. We want to make sure it doesn't increase // spilling. diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index f813a553670b..25284d6f5fcf 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -12,8 +12,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" -#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -315,18 +313,18 @@ void MachineModuleInfo::EndFunction() { /// AnalyzeModule - Scan the module for global debug information. /// -void MachineModuleInfo::AnalyzeModule(Module &M) { +void MachineModuleInfo::AnalyzeModule(const Module &M) { // Insert functions in the llvm.used array (but not llvm.compiler.used) into // UsedFunctions. - GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. - ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); if (InitList == 0) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (Function *F = + if (const Function *F = dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts())) UsedFunctions.insert(F); } @@ -407,7 +405,7 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { /// addPersonality - Provide the personality function for the exception /// information. void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, - Function *Personality) { + const Function *Personality) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); LP.Personality = Personality; @@ -426,7 +424,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. /// void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, - std::vector<GlobalVariable *> &TyInfo) { + std::vector<const GlobalVariable *> &TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); for (unsigned N = TyInfo.size(); N; --N) LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); @@ -435,7 +433,7 @@ void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. /// void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, - std::vector<GlobalVariable *> &TyInfo) { + std::vector<const GlobalVariable *> &TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); std::vector<unsigned> IdsInFilter(TyInfo.size()); for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) @@ -452,10 +450,12 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { /// TidyLandingPads - Remap landing pad labels and remove any deleted landing /// pads. -void MachineModuleInfo::TidyLandingPads() { +void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { for (unsigned i = 0; i != LandingPads.size(); ) { LandingPadInfo &LandingPad = LandingPads[i]; - if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined()) + if (LandingPad.LandingPadLabel && + !LandingPad.LandingPadLabel->isDefined() && + (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) LandingPad.LandingPadLabel = 0; // Special case: we *should* emit LPs with null LP MBB. This indicates @@ -468,7 +468,10 @@ void MachineModuleInfo::TidyLandingPads() { for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; MCSymbol *EndLabel = LandingPad.EndLabels[j]; - if (BeginLabel->isDefined() && EndLabel->isDefined()) continue; + if ((BeginLabel->isDefined() || + (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || + (LPMap && (*LPMap)[EndLabel] != 0))) continue; LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); @@ -492,7 +495,7 @@ void MachineModuleInfo::TidyLandingPads() { /// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. -unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { +unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) if (TypeInfos[i] == TI) return i + 1; @@ -532,7 +535,7 @@ try_next:; } /// getPersonality - Return the personality function for the current function. -Function *MachineModuleInfo::getPersonality() const { +const Function *MachineModuleInfo::getPersonality() const { // FIXME: Until PR1414 will be fixed, we're using 1 personality function per // function return !LandingPads.empty() ? LandingPads[0].Personality : NULL; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index d9ab6773a53a..ea5ca0cad8a1 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { @@ -130,6 +133,138 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { return ++UI == use_nodbg_end(); } +bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == Reg || I->second == Reg) + return true; + return false; +} + +bool MachineRegisterInfo::isLiveOut(unsigned Reg) const { + for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) + if (*I == Reg) + return true; + return false; +} + +/// getLiveInPhysReg - If VReg is a live-in virtual register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->second == VReg) + return I->first; + return 0; +} + +static cl::opt<bool> +SchedLiveInCopies("schedule-livein-copies", cl::Hidden, + cl::desc("Schedule copies of livein registers"), + cl::init(false)); + +/// EmitLiveInCopy - Emit a copy for a live in physical register. If the +/// physical register has only a single copy use, then coalesced the copy +/// if possible. +static void EmitLiveInCopy(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &InsertPos, + unsigned VirtReg, unsigned PhysReg, + const TargetRegisterClass *RC, + DenseMap<MachineInstr*, unsigned> &CopyRegMap, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + unsigned NumUses = 0; + MachineInstr *UseMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), + UE = MRI.use_end(); UI != UE; ++UI) { + UseMI = &*UI; + if (++NumUses > 1) + break; + } + + // If the number of uses is not one, or the use is not a move instruction, + // don't coalesce. Also, only coalesce away a virtual register to virtual + // register copy. + bool Coalesced = false; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (NumUses == 1 && + TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + VirtReg = DstReg; + Coalesced = true; + } + + // Now find an ideal location to insert the copy. + MachineBasicBlock::iterator Pos = InsertPos; + while (Pos != MBB->begin()) { + MachineInstr *PrevMI = prior(Pos); + DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); + // copyRegToReg might emit multiple instructions to do a copy. + unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; + if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) + // This is what the BB looks like right now: + // r1024 = mov r0 + // ... + // r1 = mov r1024 + // + // We want to insert "r1025 = mov r1". Inserting this copy below the + // move to r1024 makes it impossible for that move to be coalesced. + // + // r1025 = mov r1 + // r1024 = mov r0 + // ... + // r1 = mov 1024 + // r2 = mov 1025 + break; // Woot! Found a good location. + --Pos; + } + + bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; + + CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + if (Coalesced) { + if (&*InsertPos == UseMI) ++InsertPos; + MBB->erase(UseMI); + } +} + +/// EmitLiveInCopies - Emit copies to initialize livein virtual registers +/// into the given entry block. +void +MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + if (SchedLiveInCopies) { + // Emit the copies at a heuristically-determined location in the block. + DenseMap<MachineInstr*, unsigned> CopyRegMap; + MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); + for (MachineRegisterInfo::livein_iterator LI = livein_begin(), + E = livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = getRegClass(LI->second); + EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, + RC, CopyRegMap, *this, TRI, TII); + } + } else { + // Emit the copies into the top of the block. + for (MachineRegisterInfo::livein_iterator LI = livein_begin(), + E = livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = getRegClass(LI->second); + bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; + } + } + + // Add function live-ins to entry block live-in set. + for (MachineRegisterInfo::livein_iterator I = livein_begin(), + E = livein_end(); I != E; ++I) + EntryMBB->addLiveIn(I->first); +} + #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index b79cdbb660de..b8996d46f940 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -21,34 +21,50 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; -typedef std::vector<std::pair<MachineBasicBlock*, unsigned> > - IncomingPredInfoTy; +/// BBInfo - Per-basic block information used internally by MachineSSAUpdater. +class MachineSSAUpdater::BBInfo { +public: + MachineBasicBlock *BB; // Back-pointer to the corresponding block. + unsigned AvailableVal; // Value to use in this block. + BBInfo *DefBB; // Block that defines the available value. + int BlkNum; // Postorder number. + BBInfo *IDom; // Immediate dominator. + unsigned NumPreds; // Number of predecessor blocks. + BBInfo **Preds; // Array[NumPreds] of predecessor blocks. + MachineInstr *PHITag; // Marker for existing PHIs that match. + + BBInfo(MachineBasicBlock *ThisBB, unsigned V) + : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0), + NumPreds(0), Preds(0), PHITag(0) { } +}; + +typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy; +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { - return *static_cast<IncomingPredInfoTy*>(IPI); +static BBMapTy *getBBMap(void *BM) { + return static_cast<BBMapTy*>(BM); } - MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *NewPHI) - : AV(0), IPI(0), InsertedPHIs(NewPHI) { + : AV(0), BM(0), InsertedPHIs(NewPHI) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); } MachineSSAUpdater::~MachineSSAUpdater() { delete &getAvailableVals(AV); - delete &getIncomingPredInfo(IPI); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -59,11 +75,6 @@ void MachineSSAUpdater::Initialize(unsigned V) { else getAvailableVals(AV).clear(); - if (IPI == 0) - IPI = new IncomingPredInfoTy(); - else - getIncomingPredInfo(IPI).clear(); - VR = V; VRC = MRI->getRegClass(VR); } @@ -127,7 +138,7 @@ MachineInstr *InsertNewDef(unsigned Opcode, unsigned NewVR = MRI->createVirtualRegister(RC); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } - + /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that /// is live in the middle of the specified block. /// @@ -150,7 +161,7 @@ MachineInstr *InsertNewDef(unsigned Opcode, unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. - if (!getAvailableVals(AV).count(BB)) + if (!HasValueForBlock(BB)) return GetValueAtEndOfBlockInternal(BB); // If there are no predecessors, just return undef. @@ -254,141 +265,436 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry /// for the specified BB and if so, return it. If not, construct SSA form by -/// walking predecessors inserting PHI nodes as needed until we get to a block -/// where the value is available. -/// +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (unsigned V = AvailableVals[BB]) + return V; - // Query AvailableVals by doing an insertion of null. - std::pair<AvailableValsTy::iterator, bool> InsertRes = - AvailableVals.insert(std::make_pair(BB, 0)); - - // Handle the case when the insertion fails because we have already seen BB. - if (!InsertRes.second) { - // If the insertion failed, there are two cases. The first case is that the - // value is already available for the specified block. If we get this, just - // return the value. - if (InsertRes.first->second != 0) - return InsertRes.first->second; - - // Otherwise, if the value we find is null, then this is the value is not - // known but it is being computed elsewhere in our recursion. This means - // that we have a cycle. Handle this by inserting a PHI node and returning - // it. When we get back to the first instance of the recursion we will fill - // in the PHI node. - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, - VRC, MRI,TII); - unsigned NewVR = NewPHI->getOperand(0).getReg(); - InsertRes.first->second = NewVR; - return NewVR; - } + // Pool allocation used internally by GetValueAtEndOfBlock. + BumpPtrAllocator Allocator; + BBMapTy BBMapObj; + BM = &BBMapObj; - // If there are no predecessors, then we must have found an unreachable block - // just return 'undef'. Since there are no predecessors, InsertRes must not - // be invalidated. - if (BB->pred_empty()) { + SmallVector<BBInfo*, 100> BlockList; + BuildBlockList(BB, &BlockList, &Allocator); + + // Special case: bail out if BB is unreachable. + if (BlockList.size() == 0) { + BM = 0; // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), VRC, MRI, TII); - return InsertRes.first->second = NewDef->getOperand(0).getReg(); + unsigned V = NewDef->getOperand(0).getReg(); + AvailableVals[BB] = V; + return V; } - // Okay, the value isn't in the map and we just inserted a null in the entry - // to indicate that we're processing the block. Since we have no idea what - // value is in this block, we have to recurse through our predecessors. - // - // While we're walking our predecessors, we keep track of them in a vector, - // then insert a PHI node in the end if we actually need one. We could use a - // smallvector here, but that would take a lot of stack space for every level - // of the recursion, just use IncomingPredInfo as an explicit stack. - IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); - unsigned FirstPredInfoEntry = IncomingPredInfo.size(); - - // As we're walking the predecessors, keep track of whether they are all - // producing the same value. If so, this value will capture it, if not, it - // will get reset to null. We distinguish the no-predecessor case explicitly - // below. - unsigned SingularValue = 0; - bool isFirstPred = true; + FindDominators(&BlockList); + FindPHIPlacement(&BlockList); + FindAvailableVals(&BlockList); + + BM = 0; + return BBMapObj[BB]->DefBB->AvailableVal; +} + +/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds +/// vector, set Info->NumPreds, and allocate space in Info->Preds. +static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info, + SmallVectorImpl<MachineBasicBlock*> *Preds, + BumpPtrAllocator *Allocator) { + MachineBasicBlock *BB = Info->BB; for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) { - MachineBasicBlock *PredBB = *PI; - unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); - IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + E = BB->pred_end(); PI != E; ++PI) + Preds->push_back(*PI); - // Compute SingularValue. - if (isFirstPred) { - SingularValue = PredVal; - isFirstPred = false; - } else if (PredVal != SingularValue) - SingularValue = 0; + Info->NumPreds = Preds->size(); + Info->Preds = static_cast<MachineSSAUpdater::BBInfo**> + (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*), + AlignOf<MachineSSAUpdater::BBInfo*>::Alignment)); +} + +/// BuildBlockList - Starting from the specified basic block, traverse back +/// through its predecessors until reaching blocks with known values. Create +/// BBInfo structures for the blocks and append them to the block list. +void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB, + BlockListTy *BlockList, + BumpPtrAllocator *Allocator) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + BBMapTy *BBMap = getBBMap(BM); + SmallVector<BBInfo*, 10> RootList; + SmallVector<BBInfo*, 64> WorkList; + + BBInfo *Info = new (*Allocator) BBInfo(BB, 0); + (*BBMap)[BB] = Info; + WorkList.push_back(Info); + + // Search backward from BB, creating BBInfos along the way and stopping when + // reaching blocks that define the value. Record those defining blocks on + // the RootList. + SmallVector<MachineBasicBlock*, 10> Preds; + while (!WorkList.empty()) { + Info = WorkList.pop_back_val(); + Preds.clear(); + FindPredecessorBlocks(Info, &Preds, Allocator); + + // Treat an unreachable predecessor as a definition with 'undef'. + if (Info->NumPreds == 0) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + Info->BB, + Info->BB->getFirstTerminator(), + VRC, MRI, TII); + Info->AvailableVal = NewDef->getOperand(0).getReg(); + Info->DefBB = Info; + RootList.push_back(Info); + continue; + } + + for (unsigned p = 0; p != Info->NumPreds; ++p) { + MachineBasicBlock *Pred = Preds[p]; + // Check if BBMap already has a BBInfo for the predecessor block. + BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); + if (BBMapBucket.second) { + Info->Preds[p] = BBMapBucket.second; + continue; + } + + // Create a new BBInfo for the predecessor. + unsigned PredVal = AvailableVals.lookup(Pred); + BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal); + BBMapBucket.second = PredInfo; + Info->Preds[p] = PredInfo; + + if (PredInfo->AvailableVal) { + RootList.push_back(PredInfo); + continue; + } + WorkList.push_back(PredInfo); + } + } + + // Now that we know what blocks are backwards-reachable from the starting + // block, do a forward depth-first traversal to assign postorder numbers + // to those blocks. + BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0); + unsigned BlkNum = 1; + + // Initialize the worklist with the roots from the backward traversal. + while (!RootList.empty()) { + Info = RootList.pop_back_val(); + Info->IDom = PseudoEntry; + Info->BlkNum = -1; + WorkList.push_back(Info); + } + + while (!WorkList.empty()) { + Info = WorkList.back(); + + if (Info->BlkNum == -2) { + // All the successors have been handled; assign the postorder number. + Info->BlkNum = BlkNum++; + // If not a root, put it on the BlockList. + if (!Info->AvailableVal) + BlockList->push_back(Info); + WorkList.pop_back(); + continue; + } + + // Leave this entry on the worklist, but set its BlkNum to mark that its + // successors have been put on the worklist. When it returns to the top + // the list, after handling its successors, it will be assigned a number. + Info->BlkNum = -2; + + // Add unvisited successors to the work list. + for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(), + E = Info->BB->succ_end(); SI != E; ++SI) { + BBInfo *SuccInfo = (*BBMap)[*SI]; + if (!SuccInfo || SuccInfo->BlkNum) + continue; + SuccInfo->BlkNum = -1; + WorkList.push_back(SuccInfo); + } } + PseudoEntry->BlkNum = BlkNum; +} - /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If - /// this block is involved in a loop, a no-entry PHI node will have been - /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted - /// above. - unsigned &InsertedVal = AvailableVals[BB]; - - // If all the predecessor values are the same then we don't need to insert a - // PHI. This is the simple and common case. - if (SingularValue) { - // If a PHI node got inserted, replace it with the singlar value and delete - // it. - if (InsertedVal) { - MachineInstr *OldVal = MRI->getVRegDef(InsertedVal); - // Be careful about dead loops. These RAUW's also update InsertedVal. - assert(InsertedVal != SingularValue && "Dead loop?"); - ReplaceRegWith(InsertedVal, SingularValue); - OldVal->eraseFromParent(); +/// IntersectDominators - This is the dataflow lattice "meet" operation for +/// finding dominators. Given two basic blocks, it walks up the dominator +/// tree until it finds a common dominator of both. It uses the postorder +/// number of the blocks to determine how to do that. +static MachineSSAUpdater::BBInfo * +IntersectDominators(MachineSSAUpdater::BBInfo *Blk1, + MachineSSAUpdater::BBInfo *Blk2) { + while (Blk1 != Blk2) { + while (Blk1->BlkNum < Blk2->BlkNum) { + Blk1 = Blk1->IDom; + if (!Blk1) + return Blk2; } + while (Blk2->BlkNum < Blk1->BlkNum) { + Blk2 = Blk2->IDom; + if (!Blk2) + return Blk1; + } + } + return Blk1; +} - InsertedVal = SingularValue; +/// FindDominators - Calculate the dominator tree for the subset of the CFG +/// corresponding to the basic blocks on the BlockList. This uses the +/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and +/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10. +/// Because the CFG subset does not include any edges leading into blocks that +/// define the value, the results are not the usual dominator tree. The CFG +/// subset has a single pseudo-entry node with edges to a set of root nodes +/// for blocks that define the value. The dominators for this subset CFG are +/// not the standard dominators but they are adequate for placing PHIs within +/// the subset CFG. +void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) { + bool Changed; + do { + Changed = false; + // Iterate over the list in reverse order, i.e., forward on CFG edges. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + // Start with the first predecessor. + assert(Info->NumPreds > 0 && "unreachable block"); + BBInfo *NewIDom = Info->Preds[0]; + + // Iterate through the block's other predecessors. + for (unsigned p = 1; p != Info->NumPreds; ++p) { + BBInfo *Pred = Info->Preds[p]; + NewIDom = IntersectDominators(NewIDom, Pred); + } - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); - return InsertedVal; + // Check if the IDom value has changed. + if (NewIDom != Info->IDom) { + Info->IDom = NewIDom; + Changed = true; + } + } + } while (Changed); +} + +/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for +/// any blocks containing definitions of the value. If one is found, then the +/// successor of Pred is in the dominance frontier for the definition, and +/// this function returns true. +static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred, + const MachineSSAUpdater::BBInfo *IDom) { + for (; Pred != IDom; Pred = Pred->IDom) { + if (Pred->DefBB == Pred) + return true; } + return false; +} +/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of +/// the known definitions. Iteratively add PHIs in the dom frontiers until +/// nothing changes. Along the way, keep track of the nearest dominating +/// definitions for non-PHI blocks. +void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) { + bool Changed; + do { + Changed = false; + // Iterate over the list in reverse order, i.e., forward on CFG edges. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + // If this block already needs a PHI, there is nothing to do here. + if (Info->DefBB == Info) + continue; + + // Default to use the same def as the immediate dominator. + BBInfo *NewDefBB = Info->IDom->DefBB; + for (unsigned p = 0; p != Info->NumPreds; ++p) { + if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { + // Need a PHI here. + NewDefBB = Info; + break; + } + } - // Otherwise, we do need a PHI: insert one now if we don't already have one. - MachineInstr *InsertedPHI; - if (InsertedVal == 0) { - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, - VRC, MRI, TII); - InsertedVal = InsertedPHI->getOperand(0).getReg(); - } else { - InsertedPHI = MRI->getVRegDef(InsertedVal); + // Check if anything changed. + if (NewDefBB != Info->DefBB) { + Info->DefBB = NewDefBB; + Changed = true; + } + } + } while (Changed); +} + +/// FindAvailableVal - If this block requires a PHI, first check if an existing +/// PHI matches the PHI placement and reaching definitions computed earlier, +/// and if not, create a new PHI. Visit all the block's predecessors to +/// calculate the available value for each one and fill in the incoming values +/// for a new PHI. +void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + + // Go through the worklist in forward order (i.e., backward through the CFG) + // and check if existing PHIs can be used. If not, create empty PHIs where + // they are needed. + for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); + I != E; ++I) { + BBInfo *Info = *I; + // Check if there needs to be a PHI in BB. + if (Info->DefBB != Info) + continue; + + // Look for an existing PHI. + FindExistingPHI(Info->BB, BlockList); + if (Info->AvailableVal) + continue; + + MachineBasicBlock::iterator Loc = + Info->BB->empty() ? Info->BB->end() : Info->BB->front(); + MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc, + VRC, MRI, TII); + unsigned PHI = InsertedPHI->getOperand(0).getReg(); + Info->AvailableVal = PHI; + AvailableVals[Info->BB] = PHI; } - // Fill in all the predecessors of the PHI. - MachineInstrBuilder MIB(InsertedPHI); - for (IncomingPredInfoTy::iterator I = - IncomingPredInfo.begin()+FirstPredInfoEntry, - E = IncomingPredInfo.end(); I != E; ++I) - MIB.addReg(I->second).addMBB(I->first); + // Now go back through the worklist in reverse order to fill in the arguments + // for any new PHIs added in the forward traversal. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + if (Info->DefBB != Info) { + // Record the available value at join nodes to speed up subsequent + // uses of this SSAUpdater for the same value. + if (Info->NumPreds > 1) + AvailableVals[Info->BB] = Info->DefBB->AvailableVal; + continue; + } - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); + // Check if this block contains a newly added PHI. + unsigned PHI = Info->AvailableVal; + MachineInstr *InsertedPHI = MRI->getVRegDef(PHI); + if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1) + continue; + + // Iterate through the block's predecessors. + MachineInstrBuilder MIB(InsertedPHI); + for (unsigned p = 0; p != Info->NumPreds; ++p) { + BBInfo *PredInfo = Info->Preds[p]; + MachineBasicBlock *Pred = PredInfo->BB; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != PredInfo) + PredInfo = PredInfo->DefBB; + MIB.addReg(PredInfo->AvailableVal).addMBB(Pred); + } - // See if the PHI node can be merged to a single value. This can happen in - // loop cases when we get a PHI of itself and one other value. - if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { - MRI->replaceRegWith(InsertedVal, ConstVal); - InsertedPHI->eraseFromParent(); - InsertedVal = ConstVal; - } else { DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); } +} - return InsertedVal; +/// FindExistingPHI - Look through the PHI nodes in a block to see if any of +/// them match what is needed. +void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB, + BlockListTy *BlockList) { + for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + if (CheckIfPHIMatches(BBI)) { + RecordMatchingPHI(BBI); + break; + } + // Match failed: clear all the PHITag values. + for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); + I != E; ++I) + (*I)->PHITag = 0; + } +} + +/// CheckIfPHIMatches - Check if a PHI node matches the placement and values +/// in the BBMap. +bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) { + BBMapTy *BBMap = getBBMap(BM); + SmallVector<MachineInstr*, 20> WorkList; + WorkList.push_back(PHI); + + // Mark that the block containing this PHI has been visited. + (*BBMap)[PHI->getParent()]->PHITag = PHI; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) { + unsigned IncomingVal = PHI->getOperand(i).getReg(); + BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()]; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != PredInfo) + PredInfo = PredInfo->DefBB; + + // Check if it matches the expected value. + if (PredInfo->AvailableVal) { + if (IncomingVal == PredInfo->AvailableVal) + continue; + return false; + } + + // Check if the value is a PHI in the correct block. + MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal); + if (!IncomingPHIVal->isPHI() || + IncomingPHIVal->getParent() != PredInfo->BB) + return false; + + // If this block has already been visited, check if this PHI matches. + if (PredInfo->PHITag) { + if (IncomingPHIVal == PredInfo->PHITag) + continue; + return false; + } + PredInfo->PHITag = IncomingPHIVal; + + WorkList.push_back(IncomingPHIVal); + } + } + return true; +} + +/// RecordMatchingPHI - For a PHI node that matches, record it and its input +/// PHIs in both the BBMap and the AvailableVals mapping. +void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) { + BBMapTy *BBMap = getBBMap(BM); + AvailableValsTy &AvailableVals = getAvailableVals(AV); + SmallVector<MachineInstr*, 20> WorkList; + WorkList.push_back(PHI); + + // Record this PHI. + MachineBasicBlock *BB = PHI->getParent(); + AvailableVals[BB] = PHI->getOperand(0).getReg(); + (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg(); + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) { + unsigned IncomingVal = PHI->getOperand(i).getReg(); + MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal); + if (!IncomingPHIVal->isPHI()) continue; + BB = IncomingPHIVal->getParent(); + BBInfo *Info = (*BBMap)[BB]; + if (!Info || Info->AvailableVal) + continue; + + // Record the PHI and add it to the worklist. + AvailableVals[BB] = IncomingVal; + Info->AvailableVal = IncomingVal; + WorkList.push_back(IncomingPHIVal); + } + } } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index e65961901578..ef489dc55603 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -37,6 +38,7 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *RegInfo; // Machine register information MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *LI; AliasAnalysis *AA; BitVector AllocatableSet; // Which physregs are allocatable? @@ -51,7 +53,9 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); } private: bool ProcessBlock(MachineBasicBlock &MBB); @@ -102,6 +106,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { TRI = TM.getRegisterInfo(); RegInfo = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); + LI = &getAnalysis<MachineLoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); AllocatableSet = TRI->getAllocatableSet(MF); @@ -276,8 +281,29 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { - DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); - return false; + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + bool store = true; + if (!MI->isSafeToMove(TII, AA, store)) { + DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); + return false; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Don't sink instructions into a loop. + if (LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); + return false; + } + + // Otherwise we are OK with sinking along a critical edge. + DEBUG(dbgs() << "Sinking along critical edge.\n"); } // Determine where to insert into. Skip phi nodes. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 434a1e87246e..0b75c559827b 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -279,7 +279,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { if (OutFile) delete OutFile; else if (foundErrors) - llvm_report_error("Found "+Twine(foundErrors)+" machine code errors."); + report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); // Clean up. regsLive.clear(); @@ -351,8 +351,8 @@ void MachineVerifier::visitMachineFunctionBefore() { } // Does iterator point to a and b as the first two elements? -bool matchPair(MachineBasicBlock::const_succ_iterator i, - const MachineBasicBlock *a, const MachineBasicBlock *b) { +static bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { if (*i == a) return *++i == b; if (*i == b) @@ -470,7 +470,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } regsLive.clear(); - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) { if (!TargetRegisterInfo::isPhysicalRegister(*I)) { report("MBB live-in list contains non-physical register", MBB); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 424181c02549..d3e1295df3a3 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -46,7 +46,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" -#include <map> #include <set> using namespace llvm; @@ -266,6 +265,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Initialize register live-range state for scheduling in this block. Scheduler.StartBlock(MBB); + // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA + // scheduler has some sort of problem with DebugValue instructions that + // causes an assertion in LeaksContext.h to fail occasionally. Just + // remove all those instructions for now. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ) { + MachineInstr *MI = &*I++; + if (MI->isDebugValue()) + MI->eraseFromParent(); + } + // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. MachineBasicBlock::iterator Current = MBB->end(); @@ -274,7 +284,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineInstr *MI = prior(I); if (isSchedulingBoundary(MI, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); - Scheduler.EmitSchedule(0); + Scheduler.EmitSchedule(); Current = MI; CurrentCount = Count - 1; Scheduler.Observe(MI, CurrentCount); @@ -286,7 +296,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); - Scheduler.EmitSchedule(0); + Scheduler.EmitSchedule(); // Clean up register live-range state. Scheduler.FinishBlock(); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 27cb566d9512..a454b627afd0 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -131,10 +131,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = FFI->hasCalls(); + bool HasCalls = MFI->hasCalls(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -162,8 +162,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { HasCalls = true; } - FFI->setHasCalls(HasCalls); - FFI->setMaxCallFrameSize(MaxCallFrameSize); + MFI->setHasCalls(HasCalls); + MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { @@ -184,7 +184,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); @@ -197,6 +197,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (CSRegs == 0 || CSRegs[0] == 0) return; + // In Naked functions we aren't going to save any registers. + if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + return; + // Figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. const TargetRegisterClass * const *CSRegClasses = @@ -255,19 +259,19 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true, false); } I->setFrameIdx(FrameIdx); } - FFI->setCalleeSavedInfo(CSI); + MFI->setCalleeSavedInfo(CSI); } /// insertCSRSpillsAndRestores - Insert spill and restore code for @@ -275,10 +279,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. - MachineFrameInfo *FFI = Fn.getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - FFI->setCalleeSavedInfoValid(true); + MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) @@ -436,14 +440,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void -AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += FFI->getObjectSize(FrameIdx); + Offset += MFI->getObjectSize(FrameIdx); - unsigned Align = FFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI->getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -453,10 +457,10 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { - FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { - FFI->setObjectOffset(FrameIdx, Offset); - Offset += FFI->getObjectSize(FrameIdx); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); } } @@ -470,7 +474,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction @@ -487,17 +491,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. - FixedOff = -FFI->getObjectOffset(i); + FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. - FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } @@ -506,29 +510,29 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { - // If stack grows down, we need to add size of find the lowest + // If the stack grows down, we need to add the size to find the lowest // address of the object. - Offset += FFI->getObjectSize(i); + Offset += MFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, -Offset); // Set the computed offset + MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { - unsigned Align = FFI->getObjectAlignment(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, Offset); - Offset += FFI->getObjectSize(i); + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); } } - unsigned MaxAlign = FFI->getMaxAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. @@ -536,28 +540,28 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. - if (FFI->getStackProtectorIndex() >= 0) - AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + if (MFI->getStackProtectorIndex() >= 0) + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; - if (FFI->isDeadObjectIndex(i)) + if (MFI->isDeadObjectIndex(i)) continue; - if (FFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i) continue; - AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the @@ -565,15 +569,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) - Offset += FFI->getMaxCallFrameSize(); + if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to @@ -581,8 +585,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) + if (MFI->hasCalls() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); @@ -594,7 +598,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset - LocalAreaOffset); + MFI->setStackSize(Offset - LocalAreaOffset); } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp new file mode 100644 index 000000000000..2caf1dfa38e7 --- /dev/null +++ b/lib/CodeGen/RegAllocFast.cpp @@ -0,0 +1,932 @@ +//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +static RegisterRegAlloc + fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); + +namespace { + class RAFast : public MachineFunctionPass { + public: + static char ID; + RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + + // Virt2PhysRegMap - This map contains entries for each virtual register + // that is currently available in a physical register. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + // PhysRegsUsed - This array is effectively a map, containing entries for + // each physical register that currently has a value (ie, it is in + // Virt2PhysRegMap). The value mapped to is the virtual register + // corresponding to the physical register (the inverse of the + // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + // because it is used by a future instruction, and to -2 if it is not + // allocatable. If the entry for a physical register is -1, then the + // physical register is "not in the map". + // + std::vector<int> PhysRegsUsed; + + // UsedInInstr - BitVector of physregs that are used in the current + // instruction, and so cannot be allocated. + BitVector UsedInInstr; + + // Virt2LastUseMap - This maps each virtual register to its last use + // (MachineInstr*, operand index pair). + IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor> + Virt2LastUseMap; + + std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + return Virt2LastUseMap[Reg]; + } + + // VirtRegModified - This bitset contains information about which virtual + // registers need to be spilled back to memory when their registers are + // scavenged. If a virtual register has simply been rematerialized, there + // is no reason to spill it to memory when we need the register back. + // + BitVector VirtRegModified; + + // UsedInMultipleBlocks - Tracks whether a particular register is used in + // more than one block. + BitVector UsedInMultipleBlocks; + + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= TargetRegisterInfo::FirstVirtualRegister; + if (Val) + VirtRegModified.set(Reg); + else + VirtRegModified.reset(Reg); + } + + bool isVirtRegModified(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < + VirtRegModified.size() && "Illegal virtual register!"); + return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; + } + + public: + virtual const char *getPassName() const { + return "Fast Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LiveVariables>(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = TRI->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// isPhysRegSpillable - Can PhysReg be freed by spilling? + bool isPhysRegSpillable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// getReg - Find a physical register to hold the specified virtual + /// register. If all compatible physical registers are used, this method + /// spills the last used virtual register to the stack, and uses that + /// register. If NoFree is true, that means the caller knows there isn't + /// a free register, do not call getFreeReg(). + unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg, bool NoFree = false); + + /// reloadVirtReg - This method transforms the specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register: PhysReg if that is an available physical + /// register, otherwise any physical register of the right class. + /// If register pressure is high, and it is possible, it tries to fold the + /// load of the virtual register into the instruction itself. It avoids + /// doing this if register pressure is low to improve the chance that + /// subsequent instructions can use the reloaded value. This method + /// returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, SmallSet<unsigned, 4> &RRegs, + unsigned PhysReg); + + void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, + unsigned PhysReg); + }; + char RAFast::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RAFast::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RAFast::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg); + + if (!isVirtRegModified(VirtReg)) { + DEBUG(dbgs() << " which has not been modified, so no store necessary!"); + std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); + if (LastUse.first) + LastUse.first->getOperand(LastUse.second).setIsKill(); + } else { + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << " to stack slot #" << FrameIndex); + // If the instruction reads the register that's spilled, (e.g. this can + // happen if it is a move to a physical register), then the spill + // instruction is not a kill. + bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DEBUG(dbgs() << '\n'); + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + return; + } + + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. + PhysRegsUsed[*AliasSet] == -2) // If allocatable. + continue; + + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; + UsedInInstr.set(PhysReg); +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RAFast::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + +/// isPhysRegSpillable - Return true if the specified physical register can be +/// spilled for use in the current instruction. +/// +bool RAFast::isPhysRegSpillable(unsigned PhysReg) const { + // Test that PhysReg and all aliases are either free or assigned to a VirtReg + // that is not used in the instruction. + if (PhysRegsUsed[PhysReg] != -1 && + (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg))) + return false; + + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && + (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet))) + return false; + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// getReg - Find a physical register to hold the specified virtual +/// register. If all compatible physical registers are used, this method spills +/// the last used virtual register to the stack, and uses that register. +/// +unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg, bool NoFree) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + + // First check to see if we have a free register of the requested type... + unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); + + if (PhysReg != 0) { + // Assign the register. + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; + } + + // If we didn't find an unused register, scavenge one now! Don't be fancy, + // just grab the first possible register. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegSpillable(*RI)) { + PhysReg = *RI; + break; + } + + assert(PhysReg && "Physical register not assigned!?!?"); + spillPhysReg(MBB, I, PhysReg); + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; +} + + +/// reloadVirtReg - This method transforms the specified virtual +/// register use to refer to a physical register. This method may do this in +/// one of several ways: if the register is available in a physical register +/// already, it uses that physical register. If the value is not in a physical +/// register, and if there are physical registers available, it loads it into a +/// register: PhysReg if that is an available physical register, otherwise any +/// register. If register pressure is high, and it is possible, it tries to +/// fold the load of the virtual register into the instruction itself. It +/// avoids doing this if register pressure is low to improve the chance that +/// subsequent instructions can use the reloaded value. This method returns +/// the modified instruction. +/// +MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, + SmallSet<unsigned, 4> &ReloadedRegs, + unsigned PhysReg) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available, just update the instruction + // and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MI->getOperand(OpNum).setReg(PR); // Assign the input register + if (!MI->isDebugValue()) { + // Do not do these for DBG_VALUE as they can affect codegen. + UsedInInstr.set(PR); + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + } + return MI; + } + + // Otherwise, we need to fold it into the current instruction, or reload it. + // If we have registers available to hold the value, use them. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + // If we already have a PhysReg (this happens when the instruction is a + // reg-to-reg copy with a PhysReg destination) use that. + if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || + !isPhysRegAvailable(PhysReg)) + PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // Register is available, allocate it! + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // No registers available. + // Force some poor hapless value out of the register file to + // make room for the new register, and reload it. + PhysReg = getReg(MBB, MI, VirtReg, true); + } + + markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded + + DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"); + + // Add move instruction(s) + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->getRegInfo().setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + + if (!ReloadedRegs.insert(PhysReg)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(Msg, TM); + } + report_fatal_error(Msg.str()); + } + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (ReloadedRegs.insert(*SubRegs)) continue; + + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(Msg, TM); + } + report_fatal_error(Msg.str()); + } + + return MI; +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + +void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + + DEBUG({ + const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) + dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); + }); + + // Add live-in registers as active. + for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), + E = MBB.livein_end(); I != E; ++I) { + unsigned Reg = *I; + MF->getRegInfo().setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG({ + dbgs() << "\nStarting RegAlloc of: " << *MI; + dbgs() << " Regs have values: "; + for (unsigned i = 0; i != TRI->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + dbgs() << "[" << TRI->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + dbgs() << '\n'; + }); + + // Track registers used by instruction. + UsedInInstr.reset(); + + // Determine whether this is a copy instruction. The cases where the + // source or destination are phys regs are handled specially. + unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; + unsigned SrcCopyPhysReg = 0U; + bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, + SrcCopySubReg, DstCopySubReg); + if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) + SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); + + // Loop over the implicit uses, making sure they don't get reallocated. + if (TID.ImplicitUses) { + for (const unsigned *ImplicitUses = TID.ImplicitUses; + *ImplicitUses; ++ImplicitUses) + UsedInInstr.set(*ImplicitUses); + } + + SmallVector<unsigned, 8> Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + + // If any physical regs are earlyclobber, spill any value they might + // have in them, then mark them unallocatable. + // If any virtual regs are earlyclobber, allocate them now (before + // freeing inputs that are killed). + if (MI->isInlineAsm()) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || + !MO.getReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = + std::make_pair((MachineInstr*)0, 0); + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); + MO.setReg(DestPhysReg); // Assign the earlyclobber register + } else { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + } + } + } + } + + // If a DBG_VALUE says something is located in a spilled register, + // change the DBG_VALUE to be undef, which prevents the register + // from being reloaded here. Doing that would change the generated + // code, unless another use immediately follows this instruction. + if (MI->isDebugValue() && + MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { + unsigned VirtReg = MI->getOperand(0).getReg(); + if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && + !getVirt2PhysRegMapSlot(VirtReg)) + MI->getOperand(0).setReg(0U); + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + SmallSet<unsigned, 4> ReloadedRegs; + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, + isCopy ? DstCopyReg : 0); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && + "Silently clearing a virtual register?"); + } + + if (!PhysReg) continue; + + DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"); + removePhysReg(PhysReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + DEBUG(dbgs() << " Last use of " + << TRI->getName(*SubRegs) << "[%reg" << VirtReg + <<"], removing it from live set\n"); + removePhysReg(*SubRegs); + } + } + } + + // Track registers defined by instruction. + UsedInInstr.reset(); + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || + MO.isEarlyClobber() || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->getRegInfo().setPhysRegUsed(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + } + + SmallVector<unsigned, 8> DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || + MO.isEarlyClobber() || + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { + // If this is a copy try to reuse the input as the output; + // that will make the copy go away. + // If this is a copy, the source reg is a phys reg, and + // that reg is available, use that phys reg for DestPhysReg. + // If this is a copy, the source reg is a virtual reg, and + // the phys reg that was assigned to that virtual reg is now + // available, use that phys reg for DestPhysReg. (If it's now + // available that means this was the last use of the source.) + if (isCopy && + TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && + isPhysRegAvailable(SrcCopyReg)) { + DestPhysReg = SrcCopyReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else if (isCopy && + TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && + SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && + MF->getRegInfo().getRegClass(DestVirtReg)-> + contains(SrcCopyPhysReg)) { + DestPhysReg = SrcCopyPhysReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else + DestPhysReg = getReg(MBB, MI, DestVirtReg); + } + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); + MO.setReg(DestPhysReg); // Assign the output register + UsedInInstr.set(DestPhysReg); + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else if (!PhysReg) + continue; + + DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"); + removePhysReg(PhysReg); + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"); + removePhysReg(*AliasSet); + } + } + } + + // Finally, if this is a noop copy instruction, zap it. (Except that if + // the copy is dead, it must be kept to avoid messing up liveness info for + // the register scavenger. See pr4100.) + if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, + SrcCopySubReg, DstCopySubReg) && + SrcCopyReg == DstCopyReg && DeadDefs.empty()) + MBB.erase(MI); + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + } +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RAFast::runOnMachineFunction(MachineFunction &Fn) { + DEBUG(dbgs() << "Machine Function\n"); + MF = &Fn; + TM = &Fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + PhysRegsUsed.assign(TRI->getNumRegs(), -1); + UsedInInstr.resize(TRI->getNumRegs()); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = TRI->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + StackSlotForVirtReg.grow(LastVirtReg); + Virt2PhysRegMap.grow(LastVirtReg); + Virt2LastUseMap.grow(LastVirtReg); + VirtRegModified.resize(LastVirtReg+1 - + TargetRegisterInfo::FirstVirtualRegister); + UsedInMultipleBlocks.resize(LastVirtReg+1 - + TargetRegisterInfo::FirstVirtualRegister); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + UsedInMultipleBlocks.clear(); + Virt2PhysRegMap.clear(); + Virt2LastUseMap.clear(); + return true; +} + +FunctionPass *llvm::createFastRegisterAllocator() { + return new RAFast(); +} diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 5c5a394cc018..6c8fc0c2a322 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -1177,7 +1177,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { assignRegOrStackSlotAtInterval(cur); } else { assert(false && "Ran out of registers during register allocation!"); - llvm_report_error("Ran out of registers during register allocation!"); + report_fatal_error("Ran out of registers during register allocation!"); } return; } diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 0ef041e76b20..94456d143855 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -189,6 +189,9 @@ namespace { /// void removePhysReg(unsigned PhysReg); + void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg, bool isKill); + /// spillVirtReg - This method spills the value specified by PhysReg into /// the virtual register slot specified by VirtReg. It then updates the RA /// data structures to indicate the fact that PhysReg is now available. @@ -286,6 +289,17 @@ void RALocal::removePhysReg(unsigned PhysReg) { PhysRegsUseOrder.erase(It); } +/// storeVirtReg - Store a virtual register to its assigned stack slot. +void RALocal::storeVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg, + bool isKill) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << " to stack slot #" << FrameIndex); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + ++NumStores; // Update statistics +} /// spillVirtReg - This method spills the value specified by PhysReg into the /// virtual register slot specified by VirtReg. It then updates the RA data @@ -309,15 +323,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, // Otherwise, there is a virtual register corresponding to this physical // register. We only need to spill it into its stack slot if it has been // modified. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); // If the instruction reads the register that's spilled, (e.g. this can // happen if it is a move to a physical register), then the spill // instruction is not a kill. bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); - ++NumStores; // Update statistics + storeVirtReg(MBB, I, VirtReg, PhysReg, isKill); } getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available @@ -549,7 +559,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, << "constraints:\n"; MI->print(Msg, TM); } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { @@ -563,7 +573,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, << "constraints:\n"; MI->print(Msg, TM); } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } return MI; @@ -633,7 +643,10 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // uses regs before it defs them. if (!MO.isReg() || !MO.getReg() || !MO.isUse()) continue; - + + // Ignore helpful kill flags from earlier passes. + MO.setIsKill(false); + LastUseDef[MO.getReg()] = std::make_pair(I, i); if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; @@ -801,9 +814,12 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { dbgs() << "\nStarting RegAlloc of: " << *MI; dbgs() << " Regs have values: "; for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i])) + dbgs() << "*"; dbgs() << "[" << TRI->getName(i) << ",%reg" << PhysRegsUsed[i] << "] "; + } dbgs() << '\n'; }); @@ -1092,6 +1108,20 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } } + // If this instruction is a call, make sure there are no dirty registers. The + // call might throw an exception, and the landing pad expects to find all + // registers in stack slots. + if (TID.isCall()) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + if (PhysRegsUsed[i] <= 0) continue; + unsigned VirtReg = PhysRegsUsed[i]; + if (!isVirtRegModified(VirtReg)) continue; + DEBUG(dbgs() << " Storing dirty %reg" << VirtReg); + storeVirtReg(MBB, MI, VirtReg, i, false); + markVirtRegModified(VirtReg, false); + DEBUG(dbgs() << " because the call might throw\n"); + } + // Finally, if this is a noop copy instruction, zap it. (Except that if // the copy is dead, it must be kept to avoid messing up liveness info for // the register scavenger. See pr4100.) diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 67bf209c7325..179984f2a998 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -64,7 +64,7 @@ void RegScavenger::initRegState() { return; // Live-in registers are in use. - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) setUsed(*I); @@ -136,6 +136,9 @@ void RegScavenger::forward() { ScavengeRestore = NULL; } + if (MI->isDebugValue()) + return; + // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. BitVector EarlyClobberRegs(NumPhysRegs); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1f3e295fe979..587f001cc7bf 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -29,7 +29,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) TRI(TM.getRegisterInfo()), TLI(TM.getTargetLowering()), MF(mf), MRI(mf.getRegInfo()), - ConstPool(MF.getConstantPool()), EntrySU(), ExitSU() { } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ecc49e2fc1b6..ca235c3179ac 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -272,8 +272,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // perform its own adjustments. const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, (SDep &)dep); - ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } @@ -285,8 +285,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, (SDep &)dep); - ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } @@ -572,8 +572,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { } // EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs:: -EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { +MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { // For MachineInstr-based scheduling, we're rescheduling the instructions in // the block, so start by removing them from the block. while (Begin != InsertPos) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index c9b44de85ec4..d70608f30498 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -20,7 +20,6 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include <map> @@ -171,8 +170,7 @@ namespace llvm { virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const; - virtual MachineBasicBlock* - EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*); + virtual MachineBasicBlock *EmitSchedule(); /// StartBlock - Prepare to perform scheduling in the given block. /// diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 80c7d7c9eb9c..0cfd5e1d7e21 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGISel.cpp SelectionDAGPrinter.cpp TargetLowering.cpp + TargetSelectionDAGInfo.cpp ) target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a336e0a01b56..3639f8047460 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,6 +129,14 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the @@ -254,24 +262,28 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// getShiftAmountTy - Returns a type large enough to hold any valid - /// shift amount - before type legalization these can be huge. - EVT getShiftAmountTy() { - return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); - } - -public: + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), - TLI(D.getTargetLoweringInfo()), - Level(Unrestricted), - OptLevel(OL), - LegalOperations(false), - LegalTypes(false), - AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + EVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + + /// isTypeLegal - This method returns true if we are running before type + /// legalization or if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } }; } @@ -577,9 +589,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, return SDValue(N, 0); } -void -DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & - TLO) { +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorkListRemover DeadNodes(*this); @@ -609,7 +620,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { - TargetLowering::TargetLoweringOpt TLO(DAG); + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; @@ -629,6 +640,274 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return true; } +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + DebugLoc dl = Load->getDebugLoc(); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + + DEBUG(dbgs() << "\nReplacing.9 "; + Load->dump(&DAG); + dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), + &DeadNodes); + removeFromWorkList(Load); + DAG.DeleteNode(Load); + AddToWorkList(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + DebugLoc dl = Op.getDebugLoc(); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + return DAG.getNode(ISD::AssertSext, dl, PVT, + SExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::AssertZext: + return DAG.getNode(ISD::AssertZext, dl, PVT, + ZExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, dl, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, dl, OldVT); +} + +/// PromoteIntBinOp - Promote the specified integer binary operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + if (NN0.getNode() == 0) + return SDValue(); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + + AddToWorkList(NN0.getNode()); + AddToWorkList(NN1.getNode()); + + if (Replace0) + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + if (Replace1) + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, NN0, NN1)); + } + return SDValue(); +} + +/// PromoteIntShiftOp - Promote the specified integer shift operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + if (N0.getNode() == 0) + return SDValue(); + + AddToWorkList(N0.getNode()); + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + DebugLoc dl = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + + DEBUG(dbgs() << "\nPromoting "; + N->dump(&DAG); + dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + AddToWorkList(Result.getNode()); + return true; + } + return false; +} + + //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// @@ -732,7 +1011,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } SDValue DAGCombiner::visit(SDNode *N) { - switch(N->getOpcode()) { + switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); @@ -817,6 +1096,35 @@ SDValue DAGCombiner::combine(SDNode *N) { } } + // If nothing happened still, try promoting the operation. + if (RV.getNode() == 0) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. if (RV.getNode() == 0 && @@ -1720,8 +2028,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // into a vsetcc. EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && @@ -2579,7 +2889,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSHL.getNode()) + return NewSHL; + } + + return SDValue(); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -2693,7 +3009,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRA.getNode()) + return NewSRA; + } + + return SDValue(); } SDValue DAGCombiner::visitSRL(SDNode *N) { @@ -2731,6 +3053,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } + + // fold (srl (shl x, c), c) -> (and x, cst2) + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + N0.getValueSizeInBits() <= 64) { + uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -2739,10 +3070,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); - AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } } // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign @@ -3205,24 +3538,40 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. - if (VT.isVector() && + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && - - // Only do this before legalize for now. - !LegalOperations) { - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } - + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), @@ -3624,7 +3973,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && - cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits && + cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -3694,7 +4043,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3779,7 +4129,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x - if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend @@ -3805,7 +4156,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) + return ReduceLoadWidth(N); + return SDValue(); } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { @@ -3949,7 +4302,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); - if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { + if (isTypeLegal(IntXVT)) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); @@ -4075,8 +4428,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (Op.getOpcode() == ISD::UNDEF) continue; EltIsUndef = false; - NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). - zextOrTrunc(SrcBitSize).zext(DstBitSize)); + NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize); } if (EltIsUndef) @@ -4464,7 +4817,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) { + if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); } @@ -4676,7 +5029,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (Tmp.getNode() && Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); dbgs() << "\nWith: "; @@ -5145,6 +5498,136 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return SDValue(); } +/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the +/// load is having specific bytes cleared out. If so, return the byte size +/// being masked out and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // The store should be chained directly to the load or be an operand of a + // tokenfactor. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() != ISD::TokenFactor) + return Result; // Fail. + else { + bool isOk = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) + if (Chain->getOperand(i).getNode() == LD) { + isOk = true; + break; + } + if (!isOk) return Result; + } + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + + +/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that +/// provides a value as specified by MaskInfo. If so, replace the specified +/// store with a narrower store of truncated IVal. +static SDNode * +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization. + MVT VT = MVT::getIntegerVT(NumBytes*8); + if (!DC->isTypeLegal(VT)) + return 0; + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) + IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getTargetLoweringInfo().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + + ++OpsNarrowed; + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getSrcValue(), St->getSrcValueOffset()+StOffset, + false, false, NewAlign).getNode(); +} + /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some @@ -5164,6 +5647,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return SDValue(NewST, 0); + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return SDValue(NewST, 0); + } + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -5211,8 +5716,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); - if (NewAlign < - TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) + const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -5282,8 +5787,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::ppcf128: break; case MVT::f32: - if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations && - !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); @@ -5294,7 +5798,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } break; case MVT::f64: - if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations && + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -5551,7 +6055,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { InVec = InVec.getOperand(0); if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); - Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; + Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } @@ -5659,7 +6163,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // Add count and size info. - if (!TLI.isTypeLegal(VT) && LegalTypes) + if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. @@ -6287,7 +6791,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { /// FindBaseOffset - Return true if base is a frame index, which is known not // to alias with anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -6335,7 +6839,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - GlobalValue *GV1, *GV2; + const GlobalValue *GV1, *GV2; void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 4bf41f28c250..b4c38332691c 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1,4 +1,4 @@ -///===-- FastISel.cpp - Implementation of the FastISel class --------------===// +//===-- FastISel.cpp - Implementation of the FastISel class ---------------===// // // The LLVM Compiler Infrastructure // @@ -52,10 +52,11 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" #include "FunctionLoweringInfo.h" using namespace llvm; -unsigned FastISel::getRegForValue(Value *V) { +unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) @@ -83,7 +84,16 @@ unsigned FastISel::getRegForValue(Value *V) { if (Reg != 0) return Reg; - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + return materializeRegForValue(V, VT); +} + +/// materializeRegForValue - Helper for getRegForVale. This function is +/// called when the value isn't already available in a register and must +/// be materialized with new instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); } else if (isa<AllocaInst>(V)) { @@ -93,10 +103,12 @@ unsigned FastISel::getRegForValue(Value *V) { // local-CSE'd with actual integer zeros. Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); - } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + // Try to emit the constant directly. Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { + // Try to emit the constant by using an integer constant with a cast. const APFloat &Flt = CF->getValueAPF(); EVT IntVT = TLI.getPointerTy(); @@ -114,9 +126,9 @@ unsigned FastISel::getRegForValue(Value *V) { Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); } } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (!SelectOperator(CE, CE->getOpcode())) return 0; - Reg = LocalValueMap[CE]; + } else if (const Operator *Op = dyn_cast<Operator>(V)) { + if (!SelectOperator(Op, Op->getOpcode())) return 0; + Reg = LocalValueMap[Op]; } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); @@ -134,11 +146,11 @@ unsigned FastISel::getRegForValue(Value *V) { return Reg; } -unsigned FastISel::lookUpRegForValue(Value *V) { +unsigned FastISel::lookUpRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA - // def-dominatess-use requirement enforced. + // def-dominates-use requirement enforced. if (ValueMap.count(V)) return ValueMap[V]; return LocalValueMap[V]; @@ -150,7 +162,7 @@ unsigned FastISel::lookUpRegForValue(Value *V) { /// NOTE: This is only necessary because we might select a block that uses /// a value before we select the block that defines the value. It might be /// possible to fix this by selecting blocks in reverse postorder. -unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { +unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; return Reg; @@ -167,7 +179,7 @@ unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { return AssignedReg; } -unsigned FastISel::getRegForGEPIndex(Value *Idx) { +unsigned FastISel::getRegForGEPIndex(const Value *Idx) { unsigned IdxN = getRegForValue(Idx); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. @@ -186,7 +198,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// -bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) { +bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -252,7 +264,7 @@ bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) { return true; } -bool FastISel::SelectGetElementPtr(User *I) { +bool FastISel::SelectGetElementPtr(const User *I) { unsigned N = getRegForValue(I->getOperand(0)); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. @@ -260,9 +272,9 @@ bool FastISel::SelectGetElementPtr(User *I) { const Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); - for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); - OI != E; ++OI) { - Value *Idx = *OI; + for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, + E = I->op_end(); OI != E; ++OI) { + const Value *Idx = *OI; if (const StructType *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { @@ -280,7 +292,7 @@ bool FastISel::SelectGetElementPtr(User *I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); @@ -316,51 +328,56 @@ bool FastISel::SelectGetElementPtr(User *I) { return true; } -bool FastISel::SelectCall(User *I) { - Function *F = cast<CallInst>(I)->getCalledFunction(); +bool FastISel::SelectCall(const User *I) { + const Function *F = cast<CallInst>(I)->getCalledFunction(); if (!F) return false; + // Handle selected intrinsic function calls. unsigned IID = F->getIntrinsicID(); switch (IID) { default: break; case Intrinsic::dbg_declare: { - DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) || !MF.getMMI().hasDebugInfo()) return true; - Value *Address = DI->getAddress(); + const Value *Address = DI->getAddress(); if (!Address) return true; - AllocaInst *AI = dyn_cast<AllocaInst>(Address); + if (isa<UndefValue>(Address)) + return true; + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) break; - DenseMap<const AllocaInst*, int>::iterator SI = - StaticAllocaMap.find(AI); - if (SI == StaticAllocaMap.end()) break; // VLAs. - int FI = SI->second; - if (!DI->getDebugLoc().isUnknown()) - MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); - - // Building the map above is target independent. Generating DBG_VALUE - // inline is target dependent; do this now. - (void)TargetSelectInstruction(cast<Instruction>(I)); + // Note that if we have a byval struct argument, fast ISel is turned off; + // those are handled in SelectionDAGBuilder. + if (AI) { + DenseMap<const AllocaInst*, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI == StaticAllocaMap.end()) break; // VLAs. + int FI = SI->second; + if (!DI->getDebugLoc().isUnknown()) + MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + } else + // Building the map above is target independent. Generating DBG_VALUE + // inline is target dependent; do this now. + (void)TargetSelectInstruction(cast<Instruction>(I)); return true; } case Intrinsic::dbg_value: { - // This requires target support, but right now X86 is the only Fast target. - DbgValueInst *DI = cast<DbgValueInst>(I); + // This form of DBG_VALUE is target-independent. + const DbgValueInst *DI = cast<DbgValueInst>(I); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); - Value *V = DI->getValue(); + const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). addMetadata(DI->getVariable()); - } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). addMetadata(DI->getVariable()); - } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { @@ -438,10 +455,12 @@ bool FastISel::SelectCall(User *I) { break; } } + + // An arbitrary call. Bail. return false; } -bool FastISel::SelectCast(User *I, unsigned Opcode) { +bool FastISel::SelectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); @@ -493,7 +512,7 @@ bool FastISel::SelectCast(User *I, unsigned Opcode) { return true; } -bool FastISel::SelectBitCast(User *I) { +bool FastISel::SelectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); @@ -544,15 +563,28 @@ bool FastISel::SelectBitCast(User *I) { } bool -FastISel::SelectInstruction(Instruction *I) { +FastISel::SelectInstruction(const Instruction *I) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(I)) + if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + return false; + + DL = I->getDebugLoc(); + // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) + if (SelectOperator(I, I->getOpcode())) { + DL = DebugLoc(); return true; + } // Next, try calling the target to attempt to handle the instruction. - if (TargetSelectInstruction(I)) + if (TargetSelectInstruction(I)) { + DL = DebugLoc(); return true; + } + DL = DebugLoc(); return false; } @@ -573,7 +605,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { /// SelectFNeg - Emit an FNeg operation. /// bool -FastISel::SelectFNeg(User *I) { +FastISel::SelectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); if (OpReg == 0) return false; @@ -614,7 +646,7 @@ FastISel::SelectFNeg(User *I) { } bool -FastISel::SelectOperator(User *I, unsigned Opcode) { +FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); @@ -660,10 +692,10 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { return SelectGetElementPtr(I); case Instruction::Br: { - BranchInst *BI = cast<BranchInst>(I); + const BranchInst *BI = cast<BranchInst>(I); if (BI->isUnconditional()) { - BasicBlock *LLVMSucc = BI->getSuccessor(0); + const BasicBlock *LLVMSucc = BI->getSuccessor(0); MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; FastEmitBranch(MSucc); return true; @@ -678,10 +710,6 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { // Nothing to emit. return true; - case Instruction::PHI: - // PHI nodes are already emitted. - return true; - case Instruction::Alloca: // FunctionLowering has the static-sized case covered. if (StaticAllocaMap.count(cast<AllocaInst>(I))) @@ -721,6 +749,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { return true; } + case Instruction::PHI: + llvm_unreachable("FastISel shouldn't visit PHI nodes!"); + default: // Unhandled instruction. Halt "fast" selection and bail. return false; @@ -730,15 +761,17 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { FastISel::FastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif ) : MBB(0), ValueMap(vm), MBBMap(bm), StaticAllocaMap(am), + PHINodesToUpdate(pn), #ifndef NDEBUG CatchInfoLost(cil), #endif @@ -775,7 +808,7 @@ unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { } unsigned FastISel::FastEmit_f(MVT, MVT, - unsigned, ConstantFP * /*FPImm*/) { + unsigned, const ConstantFP * /*FPImm*/) { return 0; } @@ -787,7 +820,7 @@ unsigned FastISel::FastEmit_ri(MVT, MVT, unsigned FastISel::FastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, - ConstantFP * /*FPImm*/) { + const ConstantFP * /*FPImm*/) { return 0; } @@ -820,7 +853,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, ConstantFP *FPImm, + unsigned Op0, const ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); @@ -930,7 +963,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, ConstantFP *FPImm) { + unsigned Op0, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); @@ -1006,3 +1039,67 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { return FastEmit_ri(VT, VT, ISD::AND, Op, 1); } + +/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. +/// Emit code to ensure constants are copied into registers when needed. +/// Remember the virtual registers that need to be added to the Machine PHI +/// nodes as input. We cannot just directly add them, because expansion +/// might result in multiple MBB's for one BB. As such, the start of the +/// BB might correspond to a different MBB than the end. +bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary becuase FastISel doesn't + // use CreateRegForValue to create registers, so it always creates + // exactly one register for each non-void instruction. + EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Promote MVT::i1. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); + else { + PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + unsigned Reg = getRegForValue(PHIOp); + if (Reg == 0) { + PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + } + } + + return true; +} diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4fb2aa299cbe..65c36c1289db 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -14,19 +14,18 @@ #define DEBUG_TYPE "function-lowering-info" #include "FunctionLoweringInfo.h" -#include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" @@ -34,99 +33,21 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// -unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, - const unsigned *Indices, - const unsigned *IndicesEnd, - unsigned CurIndex) { - // Base case: We're done. - if (Indices && Indices == IndicesEnd) - return CurIndex; - - // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) { - if (Indices && *Indices == unsigned(EI - EB)) - return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); - } - return CurIndex; - } - // Given an array type, recursively traverse the elements. - else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const Type *EltTy = ATy->getElementType(); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { - if (Indices && *Indices == i) - return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); - } - return CurIndex; - } - // We haven't found the type we're looking for, so keep searching. - return CurIndex + 1; -} - -/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// EVTs that represent all the individual underlying -/// non-aggregate types that comprise it. -/// -/// If Offsets is non-null, it points to a vector to be filled in -/// with the in-memory offsets of each of the individual values. -/// -void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<uint64_t> *Offsets, - uint64_t StartingOffset) { - // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) - ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, - StartingOffset + SL->getElementOffset(EI - EB)); - return; - } - // Given an array type, recursively traverse the elements. - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const Type *EltTy = ATy->getElementType(); - uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, - StartingOffset + i * EltSize); - return; - } - // Interpret void as zero return values. - if (Ty->isVoidTy()) - return; - // Base case: we can get an EVT for this LLVM IR type. - ValueVTs.push_back(TLI.getValueType(Ty)); - if (Offsets) - Offsets->push_back(StartingOffset); -} - /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a /// switch or atomic instruction, which may expand to multiple basic blocks. -static bool isUsedOutsideOfDefiningBlock(Instruction *I) { +static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { + if (I->use_empty()) return false; if (isa<PHINode>(I)) return true; - BasicBlock *BB = I->getParent(); - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + const BasicBlock *BB = I->getParent(); + for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) return true; return false; @@ -135,26 +56,25 @@ static bool isUsedOutsideOfDefiningBlock(Instruction *I) { /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { +static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. - // Don't force virtual registers for byval arguments though, because - // fast-isel can't handle those in all cases. - if (EnableFastISel && !A->hasByValAttr()) + if (EnableFastISel) return A->use_empty(); - BasicBlock *Entry = A->getParent()->begin(); - for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) return false; // Use not in entry block. return true; } -FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) +FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } -void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, bool EnableFastISel) { Fn = &fn; MF = &mf; @@ -162,7 +82,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. - for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); AI != E; ++AI) if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) InitializeRegForValue(AI); @@ -170,10 +90,10 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. - Function::iterator BB = Fn->begin(), EB = Fn->end(); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + Function::const_iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { const Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = @@ -187,8 +107,8 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, } for (; BB != EB; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isUsedOutsideOfDefiningBlock(I)) if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) InitializeRegForValue(I); @@ -196,7 +116,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. - for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { + for (BB = Fn->begin(); BB != EB; ++BB) { MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); MBBMap[BB] = MBB; MF->push_back(MBB); @@ -209,14 +129,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. - PHINode *PN; - DebugLoc DL; - for (BasicBlock::iterator - I = BB->begin(), E = BB->end(); I != E; ++I) { - - PN = dyn_cast<PHINode>(I); - if (!PN || PN->use_empty()) continue; + for (BasicBlock::const_iterator I = BB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (PN->use_empty()) continue; + DebugLoc DL = PN->getDebugLoc(); unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); @@ -232,12 +149,20 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, } } } + + // Mark landing pad blocks. + for (BB = Fn->begin(); BB != EB; ++BB) + if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); } /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void FunctionLoweringInfo::clear() { + assert(CatchInfoFound.size() == CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); + MBBMap.clear(); ValueMap.clear(); StaticAllocaMap.clear(); @@ -246,6 +171,7 @@ void FunctionLoweringInfo::clear() { CatchInfoFound.clear(); #endif LiveOutRegInfo.clear(); + ArgDbgValues.clear(); } unsigned FunctionLoweringInfo::MakeReg(EVT VT) { @@ -277,30 +203,12 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { return FirstReg; } -/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -GlobalVariable *llvm::ExtractTypeInfo(Value *V) { - V = V->stripPointerCasts(); - GlobalVariable *GV = dyn_cast<GlobalVariable>(V); - - if (GV && GV->getName() == ".llvm.eh.catch.all.value") { - assert(GV->hasInitializer() && - "The EH catch-all value must have an initializer"); - Value *Init = GV->getInitializer(); - GV = dyn_cast<GlobalVariable>(Init); - if (!GV) V = cast<ConstantPointerNull>(Init); - } - - assert((GV || isa<ConstantPointerNull>(V)) && - "TypeInfo must be a global variable or NULL"); - return GV; -} - /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. -void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, +void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. - ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); assert(CE->getOpcode() == Instruction::BitCast && isa<Function>(CE->getOperand(0)) && "Personality should be a function"); @@ -308,11 +216,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. - std::vector<GlobalVariable *> TyInfo; + std::vector<const GlobalVariable *> TyInfo; unsigned N = I.getNumOperands(); for (unsigned i = N - 1; i > 2; --i) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; assert (FirstCatch <= N && "Invalid filter length"); @@ -349,10 +257,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, +void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) - if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end(); + I != E; ++I) + if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { // Apply the catch info to DestBB. AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); #ifndef NDEBUG diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h index d851e6429c0c..4067a5b33044 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h @@ -15,12 +15,17 @@ #ifndef FUNCTIONLOWERINGINFO_H #define FUNCTIONLOWERINGINFO_H +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #ifndef NDEBUG #include "llvm/ADT/SmallSet.h" #endif #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/Support/CallSite.h" #include <vector> namespace llvm { @@ -31,6 +36,7 @@ class CallInst; class Function; class GlobalVariable; class Instruction; +class MachineInstr; class MachineBasicBlock; class MachineFunction; class MachineModuleInfo; @@ -44,8 +50,8 @@ class Value; /// class FunctionLoweringInfo { public: - TargetLowering &TLI; - Function *Fn; + const TargetLowering &TLI; + const Function *Fn; MachineFunction *MF; MachineRegisterInfo *RegInfo; @@ -57,13 +63,6 @@ public: /// allocated to hold a pointer to the hidden sret parameter. unsigned DemoteRegister; - explicit FunctionLoweringInfo(TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(Function &Fn, MachineFunction &MF, bool EnableFastISel); - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap; @@ -77,27 +76,15 @@ public: /// anywhere in the function. DenseMap<const AllocaInst*, int> StaticAllocaMap; + /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for + /// function arguments that are inserted after scheduling is completed. + SmallVector<MachineInstr*, 8> ArgDbgValues; + #ifndef NDEBUG - SmallSet<Instruction*, 8> CatchInfoLost; - SmallSet<Instruction*, 8> CatchInfoFound; + SmallSet<const Instruction *, 8> CatchInfoLost; + SmallSet<const Instruction *, 8> CatchInfoFound; #endif - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } - struct LiveOutInfo { unsigned NumSignBits; APInt KnownOne, KnownZero; @@ -108,42 +95,48 @@ public: /// register number offset by 'FirstVirtualRegister'. std::vector<LiveOutInfo> LiveOutRegInfo; + /// PHINodesToUpdate - A list of phi instructions whose operand list will + /// be updated after processing the current basic block. + /// TODO: This isn't per-function state, it's per-basic-block state. But + /// there's no other convenient place for it to live right now. + std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; + + explicit FunctionLoweringInfo(const TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel); + /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void clear(); -}; -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// -unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, - const unsigned *Indices, - const unsigned *IndicesEnd, - unsigned CurIndex = 0); - -/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// EVTs that represent all the individual underlying -/// non-aggregate types that comprise it. -/// -/// If Offsets is non-null, it points to a vector to be filled in -/// with the in-memory offsets of each of the individual values. -/// -void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<uint64_t> *Offsets = 0, - uint64_t StartingOffset = 0); + unsigned MakeReg(EVT VT); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } -/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -GlobalVariable *ExtractTypeInfo(Value *V); + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } +}; /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. -void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB); +void AddCatchInfo(const CallInst &I, + MachineModuleInfo *MMI, MachineBasicBlock *MBB); /// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, +void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 28ba343e7fe6..c5dae826fff7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -296,8 +296,19 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, } } + // If this value has only one use, that use is a kill. This is a + // conservative approximation. Tied operands are never killed, so we need + // to check that. And that means we need to determine the index of the + // operand. + unsigned Idx = MI->getNumOperands(); + while (Idx > 0 && + MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + bool isKill = Op.hasOneUse() && !isTied && !IsDebug; + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, - false/*isImp*/, false/*isKill*/, + false/*isImp*/, isKill, false/*isDead*/, false/*isUndef*/, false/*isEarlyClobber*/, 0/*SubReg*/, IsDebug)); @@ -440,8 +451,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = - getSuperRegisterRegClass(TRC, SubIdx, - Node->getValueType(0)); + getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); // Figure out the register class to create for the destreg. // Note that if we're going to directly use an existing register, @@ -504,41 +514,83 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } +/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. +/// +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned NewVReg = MRI->createVirtualRegister(RC); + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); + unsigned NumOps = Node->getNumOperands(); + assert((NumOps & 1) == 0 && + "REG_SEQUENCE must have an even number of operands!"); + const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Op = Node->getOperand(i); +#ifndef NDEBUG + if (i & 1) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); + assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); + } +#endif + AddOperand(MI, Op, i+1, &II, VRBaseMap); + } + + MBB->insert(InsertPos, MI); + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// -MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD, - MachineBasicBlock *InsertBB, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { +MachineInstr * +InstrEmitter::EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap) { uint64_t Offset = SD->getOffset(); MDNode* MDPtr = SD->getMDPtr(); DebugLoc DL = SD->getDebugLoc(); + if (SD->getKind() == SDDbgValue::FRAMEIX) { + // Stack address; this needs to be lowered in target-dependent fashion. + // EmitTargetCodeForFrameDebugValue is responsible for allocation. + unsigned FrameIx = SD->getFrameIx(); + return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + } + // Otherwise, we're going to create an instruction here. const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { - AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()), - (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/); + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + true /*IsDebug*/); } else if (SD->getKind() == SDDbgValue::CONST) { - Value *V = SD->getConst(); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + const Value *V = SD->getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { MIB.addImm(CI->getSExtValue()); - } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { MIB.addFPImm(CF); } else { // Could be an Undef. In any case insert an Undef so we can see what we // dropped. MIB.addReg(0U); } - } else if (SD->getKind() == SDDbgValue::FRAMEIX) { - unsigned FrameIx = SD->getFrameIx(); - // Stack address; this needs to be lowered in target-dependent fashion. - // FIXME test that the target supports this somehow; if not emit Undef. - // Create a pseudo for EmitInstrWithCustomInserter's consumption. - MIB.addImm(FrameIx).addImm(Offset).addMetadata(MDPtr); - abort(); - TLI->EmitInstrWithCustomInserter(&*MIB, InsertBB, EM); - return 0; } else { // Insert an Undef so we can see what we dropped. MIB.addReg(0U); @@ -553,8 +605,7 @@ MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD, /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -571,6 +622,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, return; } + // Handle REG_SEQUENCE specially. + if (Opc == TargetOpcode::REG_SEQUENCE) { + EmitRegSequence(Node, VRBaseMap); + return; + } + if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; @@ -615,7 +672,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (II.usesCustomInsertionHook()) { // Insert this instruction into the basic block using a target // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); + MBB = TLI->EmitInstrWithCustomInserter(MI, MBB); InsertPos = MBB->end(); return; } @@ -646,7 +703,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, i != e; ++i) MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); } - return; } /// EmitSpecialNode - Generate machine code for a target-independent node and @@ -720,12 +776,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. - const char *AsmStr = - cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol(); + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); // Add all of the operand registers to the instruction. - for (unsigned i = 2; i != NumOps;) { + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); @@ -733,24 +789,24 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. - switch (Flags & 7) { + switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); - case 2: // Def of register. + case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, true)); } break; - case 6: // Def of earlyclobber register. + case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, false, false, true)); } break; - case 1: // Use of register. - case 3: // Immediate. - case 4: // Addressing mode. + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) @@ -758,6 +814,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + if (MD) + MI->addOperand(MachineOperand::CreateMetadata(MD)); + MBB->insert(InsertPos, MI); break; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index baabb7554b60..c7e7c71268a2 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -88,6 +88,9 @@ class InstrEmitter { void EmitCopyToRegClassNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. + /// + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -103,17 +106,14 @@ public: /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, - MachineBasicBlock *InsertBB, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + DenseMap<SDValue, unsigned> &VRBaseMap); /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + DenseMap<SDValue, unsigned> &VRBaseMap) { if (Node->isMachineOpcode()) - EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM); + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); } @@ -130,8 +130,7 @@ public: private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + DenseMap<SDValue, unsigned> &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d35f0da393e4..bedfa57b9fc9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -32,13 +32,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include <map> using namespace llvm; //===----------------------------------------------------------------------===// @@ -55,7 +53,8 @@ using namespace llvm; /// namespace { class SelectionDAGLegalize { - TargetLowering &TLI; + const TargetMachine &TM; + const TargetLowering &TLI; SelectionDAG &DAG; CodeGenOpt::Level OptLevel; @@ -213,7 +212,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, CodeGenOpt::Level ol) - : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol), + : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), OptLevel(ol), ValueTypeActions(TLI.getValueTypeActions()) { assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && "Too many value types for ValueTypeActions to hold!"); @@ -409,7 +409,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // to the final destination using (unaligned) integer loads and stores. EVT StoredVT = ST->getMemoryVT(); EVT RegVT = - TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); unsigned StoredBytes = StoredVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; @@ -445,7 +447,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // The last store may be partial. Do a truncating store. On big-endian // machines this requires an extending load from the stack slot to ensure // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, @@ -548,7 +551,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // The last copy may be partial. Do an extending load. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), @@ -968,11 +972,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->dump( &DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to legalize this operator!"); + assert(0 && "Do not know how to legalize this operator!"); case ISD::BUILD_VECTOR: switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Custom: Tmp3 = TLI.LowerOperation(Result, DAG); if (Tmp3.getNode()) { @@ -1089,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp4 = Result.getValue(1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. @@ -1259,7 +1263,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Ch); } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH @@ -1357,7 +1361,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1394,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, NVT, isVolatile, isNonTemporal, @@ -1459,7 +1464,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1658,8 +1663,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = - TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); if (Align > StackAlign) SP = DAG.getNode(ISD::AND, dl, VT, SP, DAG.getConstant(-(uint64_t)Align, VT)); @@ -1683,7 +1687,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: llvm_unreachable("Unknown condition code action!"); + default: assert(0 && "Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1691,7 +1695,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: llvm_unreachable("Don't know how to expand this condition!"); + default: assert(0 && "Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -1738,8 +1742,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); - unsigned DestAlign = - TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext())); + const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1930,7 +1934,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + default: assert(0 && "Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -1947,7 +1951,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + default: assert(0 && "Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2062,7 +2066,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // offset depending on the data type. uint64_t FF; switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported integer type!"); + default: assert(0 && "Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2182,7 +2186,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT SHVT = TLI.getShiftAmountTy(); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unhandled Expand type in BSWAP!"); + default: assert(0 && "Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2227,7 +2231,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: llvm_unreachable("Cannot expand this yet!"); + default: assert(0 && "Cannot expand this yet!"); case ISD::CTPOP: { static const uint64_t mask[6] = { 0x5555555555555555ULL, 0x3333333333333333ULL, @@ -2333,10 +2337,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); if (VT.isInteger()) Results.push_back(DAG.getConstant(0, VT)); - else if (VT.isFloatingPoint()) + else { + assert(VT.isFloatingPoint() && "Unknown value type!"); Results.push_back(DAG.getConstantFP(0, VT)); - else - llvm_unreachable("Unknown value type!"); + } break; } case ISD::TRAP: { @@ -2431,7 +2435,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> - getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), + getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, @@ -2842,7 +2846,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { + } else { + // FIXME: We should be able to fall back to a libcall with an illegal + // type in some cases. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() * 2)) && + "Don't know how to expand this operation yet!"); EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); @@ -2851,12 +2862,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(1)); - } else { - // FIXME: We should be able to fall back to a libcall with an illegal - // type in some cases. - // Also, we can fall back to a division in some cases, but that's a big - // performance hit in the general case. - llvm_unreachable("Don't know how to expand this operation yet!"); } if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -2916,7 +2921,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, PseudoSourceValue::getJumpTable(), 0, MemVT, false, false, 0); Addr = LD; - if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. @@ -3078,11 +3083,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, if (OVT.isVector()) { ExtOp = ISD::BIT_CONVERT; TruncOp = ISD::BIT_CONVERT; - } else if (OVT.isInteger()) { + } else { + assert(OVT.isInteger() && "Cannot promote logic operation"); ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; - } else { - llvm_report_error("Cannot promote logic operation"); } // Promote each of the values to the new type. Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 665b21f5a6bc..e3eb949567a3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -109,14 +109,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), BitConvertToInteger(N->getOperand(1))); } SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -338,7 +340,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); - return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, N->getDebugLoc()); + return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -489,7 +492,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { @@ -531,7 +535,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl); + return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); } @@ -1403,7 +1408,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + ST->getValue().getValueType()); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 48f64c34d6d2..548454c633a5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -204,7 +204,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { std::swap(Lo, Hi); InOp = DAG.getNode(ISD::ANY_EXTEND, dl, - EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()), + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), JoinIntegers(Lo, Hi)); return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); } @@ -464,7 +465,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); } @@ -555,7 +556,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); } SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { @@ -1383,7 +1385,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. @@ -1403,7 +1406,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part must be zero, make it explicit. @@ -1846,7 +1850,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); } } @@ -1968,7 +1973,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SplitInteger(Res, Lo, Hi); unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); - Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); + Hi = DAG.getZeroExtendInReg(Hi, dl, + EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); } } @@ -2269,7 +2275,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned EBytes = ExtVT.getStoreSize(); unsigned IncrementSize = NVT.getSizeInBits()/8; unsigned ExcessBits = (EBytes - IncrementSize)*8; - EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits); + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index f3e7ca4fa38f..17f131b21e4a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -721,7 +721,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { - assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for promoted integer"); AnalyzeNewValue(Result); @@ -731,7 +732,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for softened float"); AnalyzeNewValue(Result); @@ -762,7 +764,8 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi) { - assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && Hi.getValueType() == Lo.getValueType() && "Invalid type for expanded integer"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. @@ -788,7 +791,8 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi) { - assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && Hi.getValueType() == Lo.getValueType() && "Invalid type for expanded float"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. @@ -832,7 +836,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for widened vector"); AnalyzeNewValue(Result); @@ -940,7 +945,8 @@ void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { } else { unsigned NumElements = InVT.getVectorNumElements(); assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); } } @@ -980,7 +986,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { DebugLoc dlLo = Lo.getDebugLoc(); EVT LVT = Lo.getValueType(); EVT HVT = Hi.getValueType(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + LVT.getSizeInBits() + HVT.getSizeInBits()); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); @@ -1082,7 +1089,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, /// type half the size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), + Op.getValueType().getSizeInBits()/2); SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 9dd9796d0122..d60ad60017db 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -33,7 +33,7 @@ namespace llvm { /// into small values. /// class VISIBILITY_HIDDEN DAGTypeLegalizer { - TargetLowering &TLI; + const TargetLowering &TLI; SelectionDAG &DAG; public: // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 5e83b4ba33d6..88e1e624ae32 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -173,8 +173,9 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, - EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), - OldVec); + EVT::getVectorVT(*DAG.getContext(), + NewVT, 2*OldElts), + OldVec); // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); @@ -268,7 +269,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. EVT OVT = N->getOperand(0).getValueType(); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), OVT), + 2); if (isTypeLegal(NVT)) { SDValue Parts[2]; @@ -312,8 +315,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), - &NewElts[0], NewElts.size()); + EVT::getVectorVT(*DAG.getContext(), + NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); @@ -380,7 +384,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *St = cast<StoreSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); int SVOffset = St->getSrcValueOffset(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index b5f84c0abb23..0e2bd0233712 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -34,7 +34,7 @@ using namespace llvm; namespace { class VectorLegalizer { SelectionDAG& DAG; - TargetLowering& TLI; + const TargetLowering &TLI; bool Changed; // Keep track of whether anything changed /// LegalizedNodes - For nodes that are of legal width, and that have more diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ed5f24c1cc95..7efeea1ddaf9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -705,8 +705,9 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext())); + TLI.getTargetData()->getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, false, false, 0); @@ -1419,7 +1420,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } - EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(), + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), WidenVT.getVectorNumElements()); if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); @@ -1493,7 +1495,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); - NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); + NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); } else { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } @@ -1617,7 +1620,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); @@ -1791,7 +1795,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { EVT CondEltVT = CondVT.getVectorElementType(); - EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); if (getTypeAction(CondVT) == WidenVector) Cond1 = GetWidenedVector(Cond1); @@ -1859,7 +1864,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non vector type"); - EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -2124,7 +2130,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // The routines chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector // type. - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); DebugLoc dl = LD->getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 9d1568f01f40..ac2d33884b26 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -41,7 +41,7 @@ private: SDNode *Node; // valid for expressions unsigned ResNo; // valid for expressions } s; - Value *Const; // valid for constants + const Value *Const; // valid for constants unsigned FrameIx; // valid for stack objects } u; MDNode *mdPtr; @@ -60,7 +60,8 @@ public: } // Constructor for constants. - SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : + SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { kind = CONST; u.Const = C; @@ -86,7 +87,7 @@ public: unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } // Returns the Value* for a constant - Value *getConst() { assert (kind==CONST); return u.Const; } + const Value *getConst() { assert (kind==CONST); return u.Const; } // Returns the FrameIx for a stack object unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 3f1766d9e9f2..da028500bd0a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -17,18 +17,19 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; @@ -647,13 +648,14 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) --NumOps; // Ignore the flag operand. - for (unsigned i = 2; i != NumOps;) { + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = (Flags & 0xffff) >> 3; + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++i; // Skip the ID value. - if ((Flags & 7) == 2 || (Flags & 7) == 6) { + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index e7ab2f003963..76e47718f501 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -353,8 +353,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpSU->Latency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpSU, SU, (SDep &)dep); - ST.adjustSchedDependency(OpSU, SU, (SDep &)dep); + ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } SU->addPred(dep); @@ -422,7 +422,6 @@ namespace { // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM, DenseMap<SDValue, unsigned> &VRBaseMap, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, SmallSet<unsigned, 8> &Seen) { @@ -449,9 +448,11 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, continue; unsigned DVOrder = DVs[i]->getOrder(); if (DVOrder == ++Order) { - MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM); - Orders.push_back(std::make_pair(DVOrder, DbgMI)); - BB->insert(InsertPos, DbgMI); + MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + if (DbgMI) { + Orders.push_back(std::make_pair(DVOrder, DbgMI)); + BB->insert(InsertPos, DbgMI); + } DVs[i]->setIsInvalidated(); } } @@ -459,8 +460,7 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, /// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes:: -EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { +MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; @@ -468,6 +468,17 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { SmallSet<unsigned, 8> Seen; bool HasDbg = DAG->hasDebugValues(); + // If this is the first BB, emit byval parameter dbg_value's. + if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { + SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); + for (; PDI != PDE; ++PDI) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); + if (DbgMI) + BB->insert(BB->end(), DbgMI); + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { @@ -491,21 +502,21 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { while (!FlaggedNodes.empty()) { SDNode *N = FlaggedNodes.back(); Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, - VRBaseMap, EM); - // Remember the the source order of the inserted instruction. + VRBaseMap); + // Remember the source order of the inserted instruction. if (HasDbg) - ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen); + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); FlaggedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, - VRBaseMap, EM); - // Remember the the source order of the inserted instruction. + VRBaseMap); + // Remember the source order of the inserted instruction. if (HasDbg) - ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders, + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen); } - // Insert all the dbg_value which have not already been inserted in source + // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); @@ -540,13 +551,15 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { #endif if ((*DI)->isInvalidated()) continue; - MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM); - if (!LastOrder) - // Insert to start of the BB (after PHIs). - BB->insert(BBBegin, DbgMI); - else { - MachineBasicBlock::iterator Pos = MI; - MIBB->insert(llvm::next(Pos), DbgMI); + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + MachineBasicBlock::iterator Pos = MI; + MIBB->insert(llvm::next(Pos), DbgMI); + } } } LastOrder = Order; @@ -558,8 +571,9 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM); - InsertBB->insert(Pos, DbgMI); + MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) + InsertBB->insert(Pos, DbgMI); } ++DI; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 6b829b61066b..7ae8ec236fbf 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -59,7 +59,8 @@ namespace llvm { if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; if (isa<BlockAddressSDNode>(Node)) return true; - if (Node->getOpcode() == ISD::EntryToken) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa<MDNodeSDNode>(Node)) return true; return false; } @@ -93,8 +94,7 @@ namespace llvm { /// virtual void ComputeLatency(SUnit *SU); - virtual MachineBasicBlock * - EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + virtual MachineBasicBlock *EmitSchedule(); /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8c0554d186d9..e6df742bc336 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -304,10 +304,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, return Result; } -const TargetMachine &SelectionDAG::getTarget() const { - return MF->getTarget(); -} - //===----------------------------------------------------------------------===// // SDNode Profile Support //===----------------------------------------------------------------------===// @@ -792,8 +788,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) - : TLI(tli), FLI(fli), +SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) + : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -1048,7 +1044,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, return SDValue(N, 0); } -SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, +SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, unsigned char TargetFlags) { @@ -1319,7 +1315,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { } -SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, +SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, bool isTarget, unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; @@ -1356,6 +1352,23 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { return SDValue(N, 0); } +/// getMDNode - Return an MDNodeSDNode which holds an MDNode. +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MD); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { @@ -1904,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); @@ -2253,7 +2267,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); if (!GA) return false; if (GA->getOffset() != 0) return false; - GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); if (!GV) return false; return MF->getMMI().hasDebugInfo(); } @@ -2778,14 +2792,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // If the indices are the same, return the inserted element else // if the indices are known different, extract the element from // the original vector. - if (N1.getOperand(2) == N2) { - if (VT == N1.getOperand(1).getValueType()) - return N1.getOperand(1); - else - return getSExtOrTrunc(N1.getOperand(1), DL, VT); - } else if (isa<ConstantSDNode>(N1.getOperand(2)) && - isa<ConstantSDNode>(N2)) + SDValue N1Op2 = N1.getOperand(2); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode()); + + if (N1Op2C && N2C) { + if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } } break; case ISD::EXTRACT_ELEMENT: @@ -3178,7 +3197,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { if (!G) return false; - GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) return true; @@ -3193,6 +3212,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, + bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3201,9 +3221,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // the value, i.e. memset or memcpy from constant string. Otherwise, it's // the inferred alignment of the source. 'DstAlign', on the other hand, is the // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. + // it's possible to change the alignment of the destination. 'MemcpyStrSrc' + // indicates whether the memcpy source is constant so it does not need to be + // loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, DAG); + NonScalarIntSafe, MemcpyStrSrc, + DAG.getMachineFunction()); if (VT == MVT::Other) { if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || @@ -3269,9 +3292,6 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); @@ -3283,9 +3303,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - (isZeroStr ? 0 : SrcAlign), true, DAG, TLI)) + (isZeroStr ? 0 : SrcAlign), + true, CopyFromStr, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - SrcAlign, true, DAG, TLI)) + SrcAlign, true, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3445,7 +3469,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, DAG, TLI)) + NonScalarIntSafe, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3571,8 +3595,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, if (Result.getNode()) return Result; + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + // Emit a library call. - assert(!isVol && "library memmove does not support volatile"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); @@ -3620,8 +3646,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, if (Result.getNode()) return Result; - // Emit a library call. - assert(!isVol && "library memset does not support volatile"); + // Emit a library call. const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4913,7 +4938,7 @@ SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, } SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off, +SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); } @@ -5321,8 +5346,8 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. -void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD) { - DbgInfo->add(DB, SD); +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { + DbgInfo->add(DB, SD, isParameter); if (SD) SD->setHasDebugValue(true); } @@ -5338,7 +5363,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { - TheGlobal = const_cast<GlobalValue*>(GA); + TheGlobal = GA; } MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, @@ -5558,6 +5583,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; case ISD::EntryToken: return "EntryToken"; case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; @@ -5926,6 +5952,11 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "<" << M->getValue() << ">"; else OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { OS << ":" << N->getVT().getEVTString(); } @@ -6063,7 +6094,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { unsigned i; for (i= 0; i != NE; ++i) { - for (unsigned j = 0; j != N->getNumOperands(); ++j) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { SDValue Operand = N->getOperand(j); EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { @@ -6141,8 +6172,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, return true; } - GlobalValue *GV1 = NULL; - GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = NULL; + const GlobalValue *GV2 = NULL; int64_t Offset1 = 0; int64_t Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); @@ -6157,14 +6188,14 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. - GlobalValue *GV; + const GlobalValue *GV; int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { // If GV has specified alignment, then use it. Otherwise, use the preferred // alignment. unsigned Align = GV->getAlignment(); if (!Align) { - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { if (GVar->hasInitializer()) { const TargetData *TD = TLI.getTargetData(); Align = TD->getPreferredAlignment(GVar); @@ -6326,8 +6357,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, if (OpVal.getOpcode() == ISD::UNDEF) SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) - SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). - zextOrTrunc(sz) << BitPos); + SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos; else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; else diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4bbb3dea92f4..a38b204b247e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -28,7 +28,9 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" @@ -168,7 +170,7 @@ namespace { /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Code, + void AddInlineAsmOperands(unsigned Kind, bool HasMatching, unsigned MatchingIdx, SelectionDAG &DAG, std::vector<SDValue> &Ops) const; @@ -533,7 +535,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { TD = DAG.getTarget().getTargetData(); } -/// clear - Clear out the curret SelectionDAG and the associated +/// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering @@ -543,8 +545,6 @@ void SelectionDAGBuilder::clear() { NodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - EdgeMapping.clear(); - DAG.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } @@ -612,11 +612,26 @@ void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { AssignOrderingToNode(Node->getOperand(I).getNode()); } -void SelectionDAGBuilder::visit(Instruction &I) { +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (isa<TerminatorInst>(&I)) + HandlePHINodesInSuccessorBlocks(I.getParent()); + + CurDebugLoc = I.getDebugLoc(); + visit(I.getOpcode(), I); + + if (!isa<TerminatorInst>(&I) && !HasTailCall) + CopyToExportRegsIfNeeded(&I); + + CurDebugLoc = DebugLoc(); } -void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); +} + +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { @@ -638,28 +653,28 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { SDValue &N = NodeMap[V]; if (N.getNode()) return N; - if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { + if (const Constant *C = dyn_cast<Constant>(V)) { EVT VT = TLI.getValueType(V->getType(), true); - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return N = DAG.getConstant(*CI, VT); - if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return N = DAG.getGlobalAddress(GV, VT); if (isa<ConstantPointerNull>(C)) return N = DAG.getConstant(0, TLI.getPointerTy()); - if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return N = DAG.getConstantFP(*CFP, VT); if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) return N = DAG.getUNDEF(VT); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { visit(CE->getOpcode(), *CE); SDValue N1 = NodeMap[V]; - assert(N1.getNode() && "visit didn't populate the ValueMap!"); + assert(N1.getNode() && "visit didn't populate the NodeMap!"); return N1; } @@ -704,7 +719,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { getCurDebugLoc()); } - if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) + if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) return DAG.getBlockAddress(BA, VT); const VectorType *VecTy = cast<VectorType>(V->getType()); @@ -713,7 +728,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector<SDValue, 16> Ops; - if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) { for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CP->getOperand(i))); } else { @@ -756,7 +771,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { static void getReturnInfo(const Type* ReturnType, Attributes attr, SmallVectorImpl<EVT> &OutVTs, SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags, - TargetLowering &TLI, + const TargetLowering &TLI, SmallVectorImpl<uint64_t> *Offsets = 0) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, ReturnType, ValueVTs); @@ -811,7 +826,7 @@ static void getReturnInfo(const Type* ReturnType, } } -void SelectionDAGBuilder::visitRet(ReturnInst &I) { +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); @@ -916,18 +931,18 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) { /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. -void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) { - if (!V->use_empty()) { - DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) - CopyValueToVirtualRegister(V, VMI->second); +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); } } /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. -void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) { +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // No need to export constants. if (!isa<Instruction>(V) && !isa<Argument>(V)) return; @@ -938,11 +953,11 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) { CopyValueToVirtualRegister(V, Reg); } -bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V, +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. - if (Instruction *VI = dyn_cast<Instruction>(V)) { + if (const Instruction *VI = dyn_cast<Instruction>(V)) { // Can export from current BB. if (VI->getParent() == FromBB) return true; @@ -971,85 +986,31 @@ static bool InBlock(const Value *V, const BasicBlock *BB) { return true; } -/// getFCmpCondCode - Return the ISD condition code corresponding to -/// the given LLVM IR floating-point condition code. This includes -/// consideration of global floating-point math flags. -/// -static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { - ISD::CondCode FPC, FOC; - switch (Pred) { - case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; - case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; - case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; - case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; - case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; - case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; - case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; - case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; - case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; - case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; - case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; - case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; - case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; - case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; - case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; - case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; - default: - llvm_unreachable("Invalid FCmp predicate opcode!"); - FOC = FPC = ISD::SETFALSE; - break; - } - if (FiniteOnlyFPMath()) - return FOC; - else - return FPC; -} - -/// getICmpCondCode - Return the ISD condition code corresponding to -/// the given LLVM IR integer condition code. -/// -static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { - switch (Pred) { - case ICmpInst::ICMP_EQ: return ISD::SETEQ; - case ICmpInst::ICMP_NE: return ISD::SETNE; - case ICmpInst::ICMP_SLE: return ISD::SETLE; - case ICmpInst::ICMP_ULE: return ISD::SETULE; - case ICmpInst::ICMP_SGE: return ISD::SETGE; - case ICmpInst::ICMP_UGE: return ISD::SETUGE; - case ICmpInst::ICMP_SLT: return ISD::SETLT; - case ICmpInst::ICMP_ULT: return ISD::SETULT; - case ICmpInst::ICMP_SGT: return ISD::SETGT; - case ICmpInst::ICMP_UGT: return ISD::SETUGT; - default: - llvm_unreachable("Invalid ICmp predicate opcode!"); - return ISD::SETNE; - } -} - /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. /// void -SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond, +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - MachineBasicBlock *CurBB) { + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into // the caseblock. - if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { // The operands of the cmp have to be in this block. We don't know // how to export them from some other block. If this is the first block // of the sequence, no exporting is needed. - if (CurBB == CurMBB || + if (CurBB == SwitchBB || (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; - if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); } else { Condition = ISD::SETEQ; // silence warning. @@ -1070,19 +1031,20 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond, } /// FindMergedConditions - If Cond is an expression like -void SelectionDAGBuilder::FindMergedConditions(Value *Cond, +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, unsigned Opc) { // If this node is not part of the or/and tree, emit it as a branch. - Instruction *BOp = dyn_cast<Instruction>(Cond); + const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); return; } @@ -1102,10 +1064,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond, // // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1118,10 +1080,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond, // This requires creation of TmpBB after CurBB. // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); } } @@ -1156,19 +1118,21 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ return true; } -void SelectionDAGBuilder::visitBr(BranchInst &I) { +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = BrMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (I.isUnconditional()) { // Update machine-CFG edges. - CurMBB->addSuccessor(Succ0MBB); + BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch, emit the branch. if (Succ0MBB != NextBlock) @@ -1181,7 +1145,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { // If this condition is one of the special cases we handle, do special stuff // now. - Value *CondVal = I.getCondition(); + const Value *CondVal = I.getCondition(); MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; // If this is a series of conditions that are or'd or and'd together, emit @@ -1199,15 +1163,16 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { // cmp D, E // jle foo // - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { if (BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { - FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + BOp->getOpcode()); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. - assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. if (ShouldEmitAsBranches(SwitchCases)) { @@ -1217,7 +1182,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { } // Emit the branch for this block. - visitSwitchCase(SwitchCases[0]); + visitSwitchCase(SwitchCases[0], BrMBB); SwitchCases.erase(SwitchCases.begin()); return; } @@ -1233,16 +1198,17 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - NULL, Succ0MBB, Succ1MBB, CurMBB); + NULL, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. - visitSwitchCase(CB); + visitSwitchCase(CB, BrMBB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. -void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); DebugLoc dl = getCurDebugLoc(); @@ -1281,13 +1247,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { } // Update successor info - CurMBB->addSuccessor(CB.TrueBB); - CurMBB->addSuccessor(CB.FalseBB); + SwitchBB->addSuccessor(CB.TrueBB); + SwitchBB->addSuccessor(CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1305,11 +1271,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { // If the branch was constant folded, fix up the CFG. if (BrCond.getOpcode() == ISD::BR) { - CurMBB->removeSuccessor(CB.FalseBB); + SwitchBB->removeSuccessor(CB.FalseBB); } else { // Otherwise, go ahead and insert the false branch. if (BrCond == getControlRoot()) - CurMBB->removeSuccessor(CB.TrueBB); + SwitchBB->removeSuccessor(CB.TrueBB); if (CB.FalseBB != NextBlock) BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, @@ -1336,7 +1302,8 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, - JumpTableHeader &JTH) { + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. @@ -1368,7 +1335,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1386,7 +1353,8 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" -void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); @@ -1409,14 +1377,14 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; MachineBasicBlock* MBB = B.Cases[0].ThisBB; - CurMBB->addSuccessor(B.Default); - CurMBB->addSuccessor(MBB); + SwitchBB->addSuccessor(B.Default); + SwitchBB->addSuccessor(MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1432,7 +1400,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, - BitTestCase &B) { + BitTestCase &B, + MachineBasicBlock *SwitchBB) { // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); @@ -1450,8 +1419,8 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, AndOp, DAG.getConstant(0, TLI.getPointerTy()), ISD::SETNE); - CurMBB->addSuccessor(B.TargetBB); - CurMBB->addSuccessor(NextMBB); + SwitchBB->addSuccessor(B.TargetBB); + SwitchBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1460,7 +1429,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1471,7 +1440,9 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, DAG.setRoot(BrAnd); } -void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; @@ -1487,8 +1458,8 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { CopyToExportRegsIfNeeded(&I); // Update successor info - CurMBB->addSuccessor(Return); - CurMBB->addSuccessor(LandingPad); + InvokeMBB->addSuccessor(Return); + InvokeMBB->addSuccessor(LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), @@ -1496,15 +1467,16 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGBuilder::visitUnwind(UnwindInst &I) { +void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { } /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { Case& BackCase = *(CR.Range.second-1); // Size is the number of Cases represented by this range. @@ -1557,7 +1529,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, FallThrough = Default; } - Value *RHS, *LHS, *MHS; + const Value *RHS, *LHS, *MHS; ISD::CondCode CC; if (I->High == I->Low) { // This is just small small case range :) containing exactly 1 case @@ -1573,8 +1545,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, // code into the current block. Otherwise, push the CaseBlock onto the // vector to be later processed by SDISel, and insert the node's MBB // before the next MBB. - if (CurBlock == CurMBB) - visitSwitchCase(CB); + if (CurBlock == SwitchBB) + visitSwitchCase(CB, SwitchBB); else SwitchCases.push_back(CB); @@ -1600,8 +1572,9 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) { /// handleJTSwitchCase - Emit jumptable for current switch case range bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB) { Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1613,7 +1586,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, I!=E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4))) + if (!areJTsAllowed(TLI) || TSize.ult(4)) return false; APInt Range = ComputeRange(First, Last); @@ -1682,9 +1655,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // Set the jump table information so that we can codegen it as a second // MachineBasicBlock JumpTable JT(-1U, JTI, JumpTableBB, Default); - JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB)); - if (CR.CaseBB == CurMBB) - visitJumpTableHeader(JT, JTH); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); + if (CR.CaseBB == SwitchBB) + visitJumpTableHeader(JT, JTH, SwitchBB); JTCases.push_back(JumpTableBlock(JTH, JT)); @@ -1695,8 +1668,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, /// 2 subtrees. bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -1810,8 +1784,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Otherwise, branch to LHS. CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); - if (CR.CaseBB == CurMBB) - visitSwitchCase(CB); + if (CR.CaseBB == SwitchBB) + visitSwitchCase(CB, SwitchBB); else SwitchCases.push_back(CB); @@ -1823,8 +1797,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, /// of masks and emit bit tests with these masks. bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default){ + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB){ EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); @@ -1867,7 +1842,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, << "Low bound: " << minValue << '\n' << "High bound: " << maxValue << '\n'); - if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) || + if (cmpRange.uge(IntPtrBits) || (!(Dests.size() == 1 && numCmps >= 3) && !(Dests.size() == 2 && numCmps >= 5) && !(Dests.size() >= 3 && numCmps >= 6))) @@ -1879,8 +1854,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (minValue.isNonNegative() && - maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -1940,11 +1914,11 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, } BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, (CR.CaseBB == CurMBB), + -1U, (CR.CaseBB == SwitchBB), CR.CaseBB, Default, BTC); - if (CR.CaseBB == CurMBB) - visitBitTestHeader(BTB); + if (CR.CaseBB == SwitchBB) + visitBitTestHeader(BTB, SwitchBB); BitTestCases.push_back(BTB); @@ -1994,7 +1968,9 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, return numCmps; } -void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -2005,7 +1981,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. - CurMBB->addSuccessor(Default); + SwitchMBB->addSuccessor(Default); if (Default != NextBlock) DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2026,38 +2002,41 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // Get the Value to be switched on and default basic blocks, which will be // inserted into CaseBlock records, representing basic blocks in the binary // search tree. - Value *SV = SI.getOperand(0); + const Value *SV = SI.getOperand(0); // Push the initial CaseRec onto the worklist CaseRecVector WorkList; - WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); + WorkList.push_back(CaseRec(SwitchMBB,0,0, + CaseRange(Cases.begin(),Cases.end()))); while (!WorkList.empty()) { // Grab a record representing a case range to process off the worklist CaseRec CR = WorkList.back(); WorkList.pop_back(); - if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; // If the range has few cases (two or less) emit a series of specific // tests. - if (handleSmallSwitchRange(CR, WorkList, SV, Default)) + if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; // If the switch has more than 5 blocks, and at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. - if (handleJTSwitchCase(CR, WorkList, SV, Default)) + if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; // Emit binary tree. We need to pick a pivot, and push left and right ranges // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, Default); + handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); } } -void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + // Update machine-CFG edges with unique successors. SmallVector<BasicBlock*, 32> succs; succs.reserve(I.getNumSuccessors()); @@ -2066,14 +2045,14 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); for (unsigned i = 0, e = succs.size(); i != e; ++i) - CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } -void SelectionDAGBuilder::visitFSub(User &I) { +void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); if (Ty->isVectorTy()) { @@ -2103,14 +2082,14 @@ void SelectionDAGBuilder::visitFSub(User &I) { visitBinary(I, ISD::FSUB); } -void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) { +void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), Op1.getValueType(), Op1, Op2)); } -void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); if (!I.getType()->isVectorTy() && @@ -2144,11 +2123,11 @@ void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } -void SelectionDAGBuilder::visitICmp(User &I) { +void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; - if (ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) predicate = IC->getPredicate(); - else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) predicate = ICmpInst::Predicate(IC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2158,11 +2137,11 @@ void SelectionDAGBuilder::visitICmp(User &I) { setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } -void SelectionDAGBuilder::visitFCmp(User &I) { +void SelectionDAGBuilder::visitFCmp(const User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) predicate = FC->getPredicate(); - else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2171,7 +2150,7 @@ void SelectionDAGBuilder::visitFCmp(User &I) { setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } -void SelectionDAGBuilder::visitSelect(User &I) { +void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); @@ -2196,14 +2175,14 @@ void SelectionDAGBuilder::visitSelect(User &I) { &Values[0], NumValues)); } -void SelectionDAGBuilder::visitTrunc(User &I) { +void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitZExt(User &I) { +void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); @@ -2211,7 +2190,7 @@ void SelectionDAGBuilder::visitZExt(User &I) { setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSExt(User &I) { +void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); @@ -2219,7 +2198,7 @@ void SelectionDAGBuilder::visitSExt(User &I) { setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitFPTrunc(User &I) { +void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); @@ -2227,42 +2206,42 @@ void SelectionDAGBuilder::visitFPTrunc(User &I) { DestVT, N, DAG.getIntPtrConstant(0))); } -void SelectionDAGBuilder::visitFPExt(User &I){ +void SelectionDAGBuilder::visitFPExt(const User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitFPToUI(User &I) { +void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitFPToSI(User &I) { +void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitUIToFP(User &I) { +void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSIToFP(User &I){ +void SelectionDAGBuilder::visitSIToFP(const User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitPtrToInt(User &I) { +void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); @@ -2271,7 +2250,7 @@ void SelectionDAGBuilder::visitPtrToInt(User &I) { setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } -void SelectionDAGBuilder::visitIntToPtr(User &I) { +void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); @@ -2280,7 +2259,7 @@ void SelectionDAGBuilder::visitIntToPtr(User &I) { setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } -void SelectionDAGBuilder::visitBitCast(User &I) { +void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); @@ -2293,7 +2272,7 @@ void SelectionDAGBuilder::visitBitCast(User &I) { setValue(&I, N); // noop cast. } -void SelectionDAGBuilder::visitInsertElement(User &I) { +void SelectionDAGBuilder::visitInsertElement(const User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), @@ -2304,7 +2283,7 @@ void SelectionDAGBuilder::visitInsertElement(User &I) { InVec, InVal, InIdx)); } -void SelectionDAGBuilder::visitExtractElement(User &I) { +void SelectionDAGBuilder::visitExtractElement(const User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), @@ -2323,7 +2302,7 @@ static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { return true; } -void SelectionDAGBuilder::visitShuffleVector(User &I) { +void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); @@ -2504,7 +2483,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { VT, &Ops[0], Ops.size())); } -void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); const Type *AggTy = I.getType(); @@ -2545,7 +2524,7 @@ void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { &Values[0], NumAggValues)); } -void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); const Type *AggTy = Op0->getType(); const Type *ValTy = I.getType(); @@ -2573,13 +2552,13 @@ void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { &Values[0], NumValValues)); } -void SelectionDAGBuilder::visitGetElementPtr(User &I) { +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); const Type *Ty = I.getOperand(0)->getType(); - for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); + for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { - Value *Idx = *OI; + const Value *Idx = *OI; if (const StructType *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { @@ -2599,7 +2578,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); @@ -2650,7 +2629,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { setValue(&I, N); } -void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) @@ -2674,8 +2653,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = - TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -2702,7 +2680,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); } -void SelectionDAGBuilder::visitLoad(LoadInst &I) { +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { const Value *SV = I.getOperand(0); SDValue Ptr = getValue(SV); @@ -2762,9 +2740,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { &Values[0], NumValues)); } -void SelectionDAGBuilder::visitStore(StoreInst &I) { - Value *SrcV = I.getOperand(0); - Value *PtrV = I.getOperand(1); +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -2801,7 +2779,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) { /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. -void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { bool HasChain = !I.doesNotAccessMemory(); bool OnlyLoad = HasChain && I.onlyReadsMemory(); @@ -2927,7 +2905,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { /// visitIntrinsicCall: I is a call instruction /// Op is the associated NodeType for I const char * -SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { +SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, + ISD::NodeType Op) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), @@ -2943,7 +2922,7 @@ SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * -SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { +SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); @@ -2955,7 +2934,7 @@ SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitExp(CallInst &I) { +SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3081,7 +3060,7 @@ SelectionDAGBuilder::visitExp(CallInst &I) { /// visitLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitLog(CallInst &I) { +SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3191,7 +3170,7 @@ SelectionDAGBuilder::visitLog(CallInst &I) { /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitLog2(CallInst &I) { +SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3300,7 +3279,7 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitLog10(CallInst &I) { +SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3402,7 +3381,7 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitExp2(CallInst &I) { +SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3516,9 +3495,9 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. void -SelectionDAGBuilder::visitPow(CallInst &I) { +SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue result; - Value *Val = I.getOperand(1); + const Value *Val = I.getOperand(1); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; @@ -3664,7 +3643,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, LHS.getValueType()); - Function *F = DAG.getMachineFunction().getFunction(); + const Function *F = DAG.getMachineFunction().getFunction(); if (!F->hasFnAttr(Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. @@ -3700,12 +3679,58 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function +/// argument, create the corresponding DBG_VALUE machine instruction for it now. +/// At the end of instruction selection, they will be inserted to the entry BB. +bool +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, + const Value *V, MDNode *Variable, + uint64_t Offset, + const SDValue &N) { + if (!isa<Argument>(V)) + return false; + + MachineFunction &MF = DAG.getMachineFunction(); + // Ignore inlined function arguments here. + DIVariable DV(Variable); + if (DV.isInlinedFnArgument(MF.getFunction())) + return false; + + MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + if (MBB != &MF.front()) + return false; + + unsigned Reg = 0; + if (N.getOpcode() == ISD::CopyFromReg) { + Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned PR = RegInfo.getLiveInPhysReg(Reg); + if (PR) + Reg = PR; + } + } + + if (!Reg) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI == FuncInfo.ValueMap.end()) + return false; + Reg = VMI->second; + } + + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); + FuncInfo.ArgDbgValues.push_back(&*MIB); + return true; +} /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. const char * -SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { +SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -3792,44 +3817,76 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::dbg_declare: { - // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None. - // The real handling of this intrinsic is in FastISel. - if (OptLevel != CodeGenOpt::None) - // FIXME: Variable debug info is not supported here. - return 0; - DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) return 0; MDNode *Variable = DI.getVariable(); - Value *Address = DI.getAddress(); + // Parameters are handled specially. + bool isParameter = + DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; + const Value *Address = DI.getAddress(); if (!Address) return 0; - if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); - AllocaInst *AI = dyn_cast<AllocaInst>(Address); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) - return 0; - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + if (AI) { + // Don't handle byval arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack temporary + // alloca at this point). + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; + + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) + MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + } - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder + // but do not always have a corresponding SDNode built. The SDNodeOrder + // absolute, but not relative, values are different depending on whether + // debug info exists. + ++SDNodeOrder; + SDValue &N = NodeMap[Address]; + SDDbgValue *SDV; + if (N.getNode()) { + if (isParameter && !AI) { + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (FINode) + // Byval parameter. We have a frame index at this point. + SDV = DAG.getDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); + else + // Can't do anything with other non-AI cases yet. This might be a + // parameter of a callee function that got inlined, for example. + return 0; + } else if (AI) + SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + 0, dl, SDNodeOrder); + else + // Can't do anything with other non-AI cases yet. + return 0; + DAG.AddDbgValue(SDV, N.getNode(), isParameter); + } else { + // This isn't useful, but it shows what we're missing. + SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, isParameter); + } return 0; } case Intrinsic::dbg_value: { - DbgValueInst &DI = cast<DbgValueInst>(I); + const DbgValueInst &DI = cast<DbgValueInst>(I); if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) return 0; MDNode *Variable = DI.getVariable(); uint64_t Offset = DI.getOffset(); - Value *V = DI.getValue(); + const Value *V = DI.getValue(); if (!V) return 0; @@ -3838,26 +3895,31 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // absolute, but not relative, values are different depending on whether // debug info exists. ++SDNodeOrder; + SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) { - DAG.AddDbgValue(DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder)); + SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); } else { SDValue &N = NodeMap[V]; - if (N.getNode()) - DAG.AddDbgValue(DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder), - N.getNode()); - else + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + SDV = DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. - DAG.AddDbgValue(DAG.getDbgValue(Variable, - UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder)); + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } } // Build a debug info table entry. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) V = BCI->getOperand(0); - AllocaInst *AI = dyn_cast<AllocaInst>(V); + const AllocaInst *AI = dyn_cast<AllocaInst>(V); // Don't handle byval struct arguments or VLAs, for example. if (!AI) return 0; @@ -3874,7 +3936,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); + assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; Ops[0] = DAG.getRoot(); @@ -3885,16 +3948,17 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { + MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (CurMBB->isLandingPad()) - AddCatchInfo(I, &MMI, CurMBB); + if (CallMBB->isLandingPad()) + AddCatchInfo(I, &MMI, CallMBB); else { #ifndef NDEBUG FuncInfo.CatchInfoLost.insert(&I); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) CurMBB->addLiveIn(Reg); + if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); } // Insert the EHSELECTION instruction. @@ -3977,7 +4041,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); - Value *Op1 = I.getOperand(1); + const Value *Op1 = I.getOperand(1); Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), @@ -4146,8 +4210,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - Value *Alloca = I.getOperand(1); - Constant *TypeMap = cast<Constant>(I.getOperand(2)); + const Value *Alloca = I.getOperand(1); + const Constant *TypeMap = cast<Constant>(I.getOperand(2)); FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); @@ -4244,93 +4308,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } } -/// Test if the given instruction is in a position to be optimized -/// with a tail-call. This roughly means that it's in a block with -/// a return and there's nothing that needs to be scheduled -/// between it and the return. -/// -/// This function only tests target-independent requirements. -static bool -isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr, - const TargetLowering &TLI) { - const Instruction *I = CS.getInstruction(); - const BasicBlock *ExitBB = I->getParent(); - const TerminatorInst *Term = ExitBB->getTerminator(); - const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); - const Function *F = ExitBB->getParent(); - - // The block must end in a return statement or unreachable. - // - // FIXME: Decline tailcall if it's not guaranteed and if the block ends in - // an unreachable, for now. The way tailcall optimization is currently - // implemented means it will add an epilogue followed by a jump. That is - // not profitable. Also, if the callee is a special function (e.g. - // longjmp on x86), it can end up causing miscompilation that has not - // been fully understood. - if (!Ret && - (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; - - // If I will have a chain, make sure no other instruction that will have a - // chain interposes between I and the return. - if (I->mayHaveSideEffects() || I->mayReadFromMemory() || - !I->isSafeToSpeculativelyExecute()) - for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; - --BBI) { - if (&*BBI == I) - break; - // Debug info intrinsics do not get in the way of tail call optimization. - if (isa<DbgInfoIntrinsic>(BBI)) - continue; - if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !BBI->isSafeToSpeculativelyExecute()) - return false; - } - - // If the block ends with a void return or unreachable, it doesn't matter - // what the call's return type is. - if (!Ret || Ret->getNumOperands() == 0) return true; - - // If the return value is undef, it doesn't matter what the call's - // return type is. - if (isa<UndefValue>(Ret->getOperand(0))) return true; - - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); - if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) - return false; - - // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) - return false; - - // Otherwise, make sure the unmodified return value of I is the return value. - for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; - U = dyn_cast<Instruction>(U->getOperand(0))) { - if (!U) - return false; - if (!U->hasOneUse()) - return false; - if (U == I) - break; - // Check for a truly no-op truncate. - if (isa<TruncInst>(U) && - TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) - continue; - // Check for a truly no-op bitcast. - if (isa<BitCastInst>(U) && - (U->getOperand(0)->getType() == U->getType() || - (U->getOperand(0)->getType()->isPointerTy() && - U->getType()->isPointerTy()))) - continue; - // Otherwise it's not a true no-op. - return false; - } - - return true; -} - -void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); @@ -4378,7 +4356,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, RetTy = Type::getVoidTy(FTy->getContext()); } - for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); @@ -4509,12 +4487,12 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); +static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) if (IC->isEquality()) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) continue; // Unknown instruction. @@ -4523,17 +4501,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } -static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + const Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. - if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { + if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { // Cast pointer to the type we really want to load. - LoadInput = ConstantExpr::getBitCast(LoadInput, + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD)) + if (const Constant *LoadCst = + ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), + Builder.TD)) return Builder.getValue(LoadCst); } @@ -4566,18 +4547,18 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. -bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) if (I.getNumOperands() != 4) return false; - Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || !I.getOperand(3)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; - ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); + const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 @@ -4643,11 +4624,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { } -void SelectionDAGBuilder::visitCall(CallInst &I) { +void SelectionDAGBuilder::visitCall(const CallInst &I) { const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo(); + const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); if (II) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); @@ -4871,14 +4852,13 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker and includes the number of /// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, - bool HasMatching,unsigned MatchingIdx, +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { - assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); - unsigned Flag = Code | (Regs.size() << 3); + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) - Flag |= 0x80000000 | (MatchingIdx << 16); + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); @@ -4994,7 +4974,7 @@ public: if (isIndirect) { const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) - llvm_report_error("Indirect operand for inline asm not a pointer!"); + report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } @@ -5214,31 +5194,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // Otherwise, we couldn't allocate enough registers for this. } -/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being -/// processed uses a memory 'm' constraint. -static bool -hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, - const TargetLowering &TLI) { - for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { - InlineAsm::ConstraintInfo &CI = CInfos[i]; - for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); - if (CType == TargetLowering::C_Memory) - return true; - } - - // Indirect operand accesses access memory. - if (CI.isIndirect) - return true; - } - - return false; -} - /// visitInlineAsm - Handle a call to an InlineAsm object. /// -void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { - InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); +void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. std::vector<SDISelAsmOperandInfo> ConstraintOperands; @@ -5274,7 +5233,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); break; } @@ -5291,7 +5250,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { ++ResNo; break; case InlineAsm::isInput: - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); break; case InlineAsm::isClobber: // Nothing to do. @@ -5304,7 +5263,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Strip bitcasts, if any. This mostly comes up for functions. OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); - if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); @@ -5327,14 +5286,15 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (OpInfo.ConstraintVT.getSizeInBits() != Input.ConstraintVT.getSizeInBits())) { - llvm_report_error("Unsupported asm: input constraint" - " with a matching output constraint of incompatible" - " type!"); + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); } Input.ConstraintVT = OpInfo.ConstraintVT; } @@ -5356,7 +5316,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. - Value *OpVal = OpInfo.CallOperandVal; + const Value *OpVal = OpInfo.CallOperandVal; if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), @@ -5409,6 +5369,11 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), TLI.getPointerTy())); + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -5428,8 +5393,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { assert(OpInfo.isIndirect && "Memory output must be indirect operand"); // Add information to the INLINEASM node to know about this output. - unsigned ResOpType = 4/*MEM*/ | (1<<3); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; @@ -5439,10 +5404,9 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) { - llvm_report_error("Couldn't allocate output reg for" - " constraint '" + OpInfo.ConstraintCode + "'!"); - } + if (OpInfo.AssignedRegs.Regs.empty()) + report_fatal_error("Couldn't allocate output reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); // If this is an indirect operand, store through the pointer after the // asm. @@ -5459,8 +5423,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - 6 /* EARLYCLOBBER REGDEF */ : - 2 /* REGDEF */ , + InlineAsm::Kind_RegDefEarlyClobber : + InlineAsm::Kind_RegDef, false, 0, DAG, @@ -5477,27 +5441,30 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Scan until we find the definition we already emitted of this operand. // When we find it, create a RegsForValue operand. - unsigned CurOp = 2; // The first operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - assert(((OpFlag & 7) == 2 /*REGDEF*/ || - (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || - (OpFlag & 7) == 4 /*MEM*/) && - "Skipped past definitions?"); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; } unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - if ((OpFlag & 7) == 2 /*REGDEF*/ - || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { - llvm_report_error("Don't know how to handle tied indirect " - "register inputs yet!"); + // This happens on gcc/testsuite/gcc.dg/pr8788-1.c + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); } + RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); @@ -5512,22 +5479,23 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); break; - } else { - assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); - assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && - "Unexpected number of operands"); - // Add information to the INLINEASM node to know about this input. - // See InlineAsm.h isUseOperandTiedToDef. - OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); - AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); - break; } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; } if (OpInfo.ConstraintType == TargetLowering::C_Other) { @@ -5537,24 +5505,26 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], hasMemory, Ops, DAG); - if (Ops.empty()) { - llvm_report_error("Invalid operand for inline asm" - " constraint '" + OpInfo.ConstraintCode + "'!"); - } + if (Ops.empty()) + report_fatal_error("Invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; - } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = 4/*MEM*/ | (1<<3); + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); @@ -5569,15 +5539,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal()) { - llvm_report_error("Couldn't allocate input reg for" - " constraint '"+ OpInfo.ConstraintCode +"'!"); - } + !OpInfo.AssignedRegs.areValueTypesLegal()) + report_fatal_error("Couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); break; } @@ -5585,7 +5554,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, + OpInfo.AssignedRegs.AddInlineAsmOperands( + InlineAsm::Kind_RegDefEarlyClobber, false, 0, DAG, AsmNodeOperands); break; @@ -5593,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { } } - // Finish up input operands. + // Finish up input operands. Set the input chain and add the flag last. AsmNodeOperands[0] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); @@ -5638,17 +5608,16 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { return; } - std::vector<std::pair<SDValue, Value*> > StoresToEmit; + std::vector<std::pair<SDValue, const Value *> > StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; - Value *Ptr = IndirectStoresToEmit[i].second; + const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); - } // Emit the non-flagged stores from the physregs. @@ -5669,14 +5638,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { DAG.setRoot(Chain); } -void SelectionDAGBuilder::visitVAStart(CallInst &I) { +void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } -void SelectionDAGBuilder::visitVAArg(VAArgInst &I) { +void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0))); @@ -5684,14 +5653,14 @@ void SelectionDAGBuilder::visitVAArg(VAArgInst &I) { DAG.setRoot(V.getValue(1)); } -void SelectionDAGBuilder::visitVAEnd(CallInst &I) { +void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } -void SelectionDAGBuilder::visitVACopy(CallInst &I) { +void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), @@ -5711,7 +5680,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, CallingConv::ID CallConv, bool isTailCall, bool isReturnValueUsed, SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl) const { // Handle all of the outgoing arguments. SmallVector<ISD::OutputArg, 32> Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { @@ -5862,18 +5832,19 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Res = LowerOperation(SDValue(N, 0), DAG); if (Res.getNode()) Results.push_back(Res); } -SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); return SDValue(); } -void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) { +void +SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { SDValue Op = getValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && @@ -5888,9 +5859,9 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) { #include "llvm/CodeGen/SelectionDAGISel.h" -void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { +void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. - Function &F = *LLVMBB->getParent(); + const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDB->getCurDebugLoc(); @@ -5915,14 +5886,14 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]); + EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, true); Ins.push_back(RetArg); } // Set up the incoming argument description vector. unsigned Idx = 1; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); @@ -6024,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { ++i; } - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; @@ -6067,7 +6038,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // Finally, if the target has anything special to do, allow it to do so. // FIXME: this should insert code into the DAG! - EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction()); + EmitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to @@ -6078,51 +6049,50 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { /// the end. /// void -SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { - TerminatorInst *TI = LLVMBB->getTerminator(); +SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { - BasicBlock *SuccBB = TI->getSuccessor(succ); + const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB)) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); - PHINode *PN; // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::iterator I = SuccBB->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; unsigned Reg; - Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); - if (Constant *C = dyn_cast<Constant>(PHIOp)) { - unsigned &RegOut = SDB->ConstantsOut[C]; + if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo->CreateRegForValue(C); - SDB->CopyValueToVirtualRegister(C, RegOut); + RegOut = FuncInfo.CreateRegForValue(C); + CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { - Reg = FuncInfo->ValueMap[PHIOp]; + Reg = FuncInfo.ValueMap[PHIOp]; if (Reg == 0) { assert(isa<AllocaInst>(PHIOp) && - FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo->CreateRegForValue(PHIOp); - SDB->CopyValueToVirtualRegister(PHIOp, Reg); + Reg = FuncInfo.CreateRegForValue(PHIOp); + CopyValueToVirtualRegister(PHIOp, Reg); } } @@ -6132,77 +6102,12 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT); + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) - SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } - SDB->ConstantsOut.clear(); -} - -/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only -/// supports legal types, and it emits MachineInstrs directly instead of -/// creating SelectionDAG nodes. -/// -bool -SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, - FastISel *F) { - TerminatorInst *TI = LLVMBB->getTerminator(); - - SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size(); - - // Check successor nodes' PHI nodes that expect a constant to be available - // from this block. - for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { - BasicBlock *SuccBB = TI->getSuccessor(succ); - if (!isa<PHINode>(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; - - // If this terminator has multiple identical successors (common for - // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; - - MachineBasicBlock::iterator MBBI = SuccMBB->begin(); - PHINode *PN; - - // At this point we know that there is a 1-1 correspondence between LLVM PHI - // nodes and Machine PHI nodes, but the incoming operands have not been - // emitted yet. - for (BasicBlock::iterator I = SuccBB->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { - // Ignore dead phi's. - if (PN->use_empty()) continue; - - // Only handle legal types. Two interesting things to note here. First, - // by bailing out early, we may leave behind some dead instructions, - // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't - // use CreateRegForValue to create registers, so it always creates - // exactly one register for each non-void instruction. - EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); - if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { - // Promote MVT::i1. - if (VT == MVT::i1) - VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT); - else { - SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); - return false; - } - } - - Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); - - unsigned Reg = F->getRegForValue(PHIOp); - if (Reg == 0) { - SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); - return false; - } - SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); - } - } - - return true; + ConstantsOut.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index fdcba0f0bc72..3fcd4b9dc437 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -36,6 +36,7 @@ class BasicBlock; class BitCastInst; class BranchInst; class CallInst; +class DbgValueInst; class ExtractElementInst; class ExtractValueInst; class FCmpInst; @@ -58,6 +59,7 @@ class LoadInst; class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; +class MDNode; class PHINode; class PtrToIntInst; class ReturnInst; @@ -80,11 +82,8 @@ class ZExtInst; //===----------------------------------------------------------------------===// /// SelectionDAGBuilder - This is the common target-independent lowering /// implementation that is parameterized by a TargetLowering object. -/// Also, targets can overload any lowering method. /// class SelectionDAGBuilder { - MachineBasicBlock *CurMBB; - /// CurDebugLoc - current file + line number. Changes as we build the DAG. DebugLoc CurDebugLoc; @@ -143,15 +142,16 @@ private: /// CaseRec - A struct with ctor used in lowering switches to a binary tree /// of conditional branches. struct CaseRec { - CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : + CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, + CaseRange r) : CaseBB(bb), LT(lt), GE(ge), Range(r) {} /// CaseBB - The MBB in which to emit the compare and branch MachineBasicBlock *CaseBB; /// LT, GE - If nonzero, we know the current case value must be less-than or /// greater-than-or-equal-to these Constants. - Constant *LT; - Constant *GE; + const Constant *LT; + const Constant *GE; /// Range - A pair of iterators representing the range of case values to be /// processed at this point in the binary search tree. CaseRange Range; @@ -182,7 +182,8 @@ private: /// SelectionDAGBuilder and SDISel for the code generation of additional basic /// blocks needed by multi-case switch statements. struct CaseBlock { - CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle, + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, MachineBasicBlock *truebb, MachineBasicBlock *falsebb, MachineBasicBlock *me) : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), @@ -192,7 +193,7 @@ private: // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. // Emit by default LHS op RHS. MHS is used for range comparisons: // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). - Value *CmpLHS, *CmpMHS, *CmpRHS; + const Value *CmpLHS, *CmpMHS, *CmpRHS; // TrueBB/FalseBB - the block to branch to if the setcc is true/false. MachineBasicBlock *TrueBB, *FalseBB; // ThisBB - the block into which to emit the code for the setcc and branches @@ -214,12 +215,12 @@ private: MachineBasicBlock *Default; }; struct JumpTableHeader { - JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H, + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, bool E = false): First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} APInt First; APInt Last; - Value *SValue; + const Value *SValue; MachineBasicBlock *HeaderBB; bool Emitted; }; @@ -236,7 +237,7 @@ private: typedef SmallVector<BitTestCase, 3> BitTestInfo; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, Value* SV, + BitTestBlock(APInt F, APInt R, const Value* SV, unsigned Rg, bool E, MachineBasicBlock* P, MachineBasicBlock* D, const BitTestInfo& C): @@ -244,7 +245,7 @@ private: Parent(P), Default(D), Cases(C) { } APInt First; APInt Range; - Value *SValue; + const Value *SValue; unsigned Reg; bool Emitted; MachineBasicBlock *Parent; @@ -256,7 +257,8 @@ public: // TLI - This is information that describes the available target features we // need for lowering. This indicates when operations are unavailable, // implemented with a libcall, etc. - TargetLowering &TLI; + const TargetMachine &TM; + const TargetLowering &TLI; SelectionDAG &DAG; const TargetData *TD; AliasAnalysis *AA; @@ -271,17 +273,9 @@ public: /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; - /// PHINodesToUpdate - A list of phi instructions whose operand list will - /// be updated after processing the current basic block. - std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; - - /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to - /// scheduler custom lowering), track the change here. - DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping; - // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. - DenseMap<Constant*, unsigned> ConstantsOut; + DenseMap<const Constant *, unsigned> ConstantsOut; /// FuncInfo - Information about the function as a whole. /// @@ -302,16 +296,16 @@ public: LLVMContext *Context; - SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli, - FunctionLoweringInfo &funcinfo, + SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : SDNodeOrder(0), TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false), Context(dag.getContext()) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa); - /// clear - Clear out the curret SelectionDAG and the associated + /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering @@ -333,22 +327,19 @@ public: SDValue getControlRoot(); DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; } unsigned getSDNodeOrder() const { return SDNodeOrder; } - void CopyValueToVirtualRegister(Value *V, unsigned Reg); + void CopyValueToVirtualRegister(const Value *V, unsigned Reg); /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten /// from how the code appeared in the source. The ordering is used by the /// scheduler to effectively turn off scheduling. void AssignOrderingToNode(const SDNode *Node); - void visit(Instruction &I); - - void visit(unsigned Opcode, User &I); + void visit(const Instruction &I); - void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } + void visit(unsigned Opcode, const User &I); SDValue getValue(const Value *V); @@ -362,136 +353,154 @@ public: std::set<unsigned> &OutputRegs, std::set<unsigned> &InputRegs); - void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - unsigned Opc); - void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *SwitchBB, unsigned Opc); + void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - MachineBasicBlock *CurBB); + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); - bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); - void CopyToExportRegsIfNeeded(Value *V); - void ExportFromCurrentBlock(Value *V); - void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall, + bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(const Value *V); + void ExportFromCurrentBlock(const Value *V); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); private: // Terminator instructions. - void visitRet(ReturnInst &I); - void visitBr(BranchInst &I); - void visitSwitch(SwitchInst &I); - void visitIndirectBr(IndirectBrInst &I); - void visitUnreachable(UnreachableInst &I) { /* noop */ } + void visitRet(const ReturnInst &I); + void visitBr(const BranchInst &I); + void visitSwitch(const SwitchInst &I); + void visitIndirectBr(const IndirectBrInst &I); + void visitUnreachable(const UnreachableInst &I) { /* noop */ } // Helpers for visitSwitch bool handleSmallSwitchRange(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); bool handleJTSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); bool handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); bool handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); public: - void visitSwitchCase(CaseBlock &CB); - void visitBitTestHeader(BitTestBlock &B); + void visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB); + void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, - BitTestCase &B); + BitTestCase &B, + MachineBasicBlock *SwitchBB); void visitJumpTable(JumpTable &JT); - void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB); private: // These all get lowered before this pass. - void visitInvoke(InvokeInst &I); - void visitUnwind(UnwindInst &I); - - void visitBinary(User &I, unsigned OpCode); - void visitShift(User &I, unsigned Opcode); - void visitAdd(User &I) { visitBinary(I, ISD::ADD); } - void visitFAdd(User &I) { visitBinary(I, ISD::FADD); } - void visitSub(User &I) { visitBinary(I, ISD::SUB); } - void visitFSub(User &I); - void visitMul(User &I) { visitBinary(I, ISD::MUL); } - void visitFMul(User &I) { visitBinary(I, ISD::FMUL); } - void visitURem(User &I) { visitBinary(I, ISD::UREM); } - void visitSRem(User &I) { visitBinary(I, ISD::SREM); } - void visitFRem(User &I) { visitBinary(I, ISD::FREM); } - void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); } - void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); } - void visitAnd (User &I) { visitBinary(I, ISD::AND); } - void visitOr (User &I) { visitBinary(I, ISD::OR); } - void visitXor (User &I) { visitBinary(I, ISD::XOR); } - void visitShl (User &I) { visitShift(I, ISD::SHL); } - void visitLShr(User &I) { visitShift(I, ISD::SRL); } - void visitAShr(User &I) { visitShift(I, ISD::SRA); } - void visitICmp(User &I); - void visitFCmp(User &I); + void visitInvoke(const InvokeInst &I); + void visitUnwind(const UnwindInst &I); + + void visitBinary(const User &I, unsigned OpCode); + void visitShift(const User &I, unsigned Opcode); + void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } + void visitSub(const User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(const User &I); + void visitMul(const User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } + void visitURem(const User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (const User &I) { visitBinary(I, ISD::AND); } + void visitOr (const User &I) { visitBinary(I, ISD::OR); } + void visitXor (const User &I) { visitBinary(I, ISD::XOR); } + void visitShl (const User &I) { visitShift(I, ISD::SHL); } + void visitLShr(const User &I) { visitShift(I, ISD::SRL); } + void visitAShr(const User &I) { visitShift(I, ISD::SRA); } + void visitICmp(const User &I); + void visitFCmp(const User &I); // Visit the conversion instructions - void visitTrunc(User &I); - void visitZExt(User &I); - void visitSExt(User &I); - void visitFPTrunc(User &I); - void visitFPExt(User &I); - void visitFPToUI(User &I); - void visitFPToSI(User &I); - void visitUIToFP(User &I); - void visitSIToFP(User &I); - void visitPtrToInt(User &I); - void visitIntToPtr(User &I); - void visitBitCast(User &I); - - void visitExtractElement(User &I); - void visitInsertElement(User &I); - void visitShuffleVector(User &I); - - void visitExtractValue(ExtractValueInst &I); - void visitInsertValue(InsertValueInst &I); - - void visitGetElementPtr(User &I); - void visitSelect(User &I); - - void visitAlloca(AllocaInst &I); - void visitLoad(LoadInst &I); - void visitStore(StoreInst &I); - void visitPHI(PHINode &I) { } // PHI nodes are handled specially. - void visitCall(CallInst &I); - bool visitMemCmpCall(CallInst &I); + void visitTrunc(const User &I); + void visitZExt(const User &I); + void visitSExt(const User &I); + void visitFPTrunc(const User &I); + void visitFPExt(const User &I); + void visitFPToUI(const User &I); + void visitFPToSI(const User &I); + void visitUIToFP(const User &I); + void visitSIToFP(const User &I); + void visitPtrToInt(const User &I); + void visitIntToPtr(const User &I); + void visitBitCast(const User &I); + + void visitExtractElement(const User &I); + void visitInsertElement(const User &I); + void visitShuffleVector(const User &I); + + void visitExtractValue(const ExtractValueInst &I); + void visitInsertValue(const InsertValueInst &I); + + void visitGetElementPtr(const User &I); + void visitSelect(const User &I); + + void visitAlloca(const AllocaInst &I); + void visitLoad(const LoadInst &I); + void visitStore(const StoreInst &I); + void visitPHI(const PHINode &I); + void visitCall(const CallInst &I); + bool visitMemCmpCall(const CallInst &I); - void visitInlineAsm(CallSite CS); - const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); - void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); - - void visitPow(CallInst &I); - void visitExp2(CallInst &I); - void visitExp(CallInst &I); - void visitLog(CallInst &I); - void visitLog2(CallInst &I); - void visitLog10(CallInst &I); - - void visitVAStart(CallInst &I); - void visitVAArg(VAArgInst &I); - void visitVAEnd(CallInst &I); - void visitVACopy(CallInst &I); - - void visitUserOp1(Instruction &I) { + void visitInlineAsm(ImmutableCallSite CS); + const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + + void visitPow(const CallInst &I); + void visitExp2(const CallInst &I); + void visitExp(const CallInst &I); + void visitLog(const CallInst &I); + void visitLog2(const CallInst &I); + void visitLog10(const CallInst &I); + + void visitVAStart(const CallInst &I); + void visitVAArg(const VAArgInst &I); + void visitVAEnd(const CallInst &I); + void visitVACopy(const CallInst &I); + + void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } - void visitUserOp2(Instruction &I) { + void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); - const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); + const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); + + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + + /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a + /// function argument, create the corresponding DBG_VALUE machine instruction + /// for it now. At the end of instruction selection, they will be inserted to + /// the entry BB. + bool EmitFuncArgumentDbgValue(const DbgValueInst &DI, + const Value *V, MDNode *Variable, + uint64_t Offset, const SDValue &N); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9b137a52f7c3..422cb7aaafc7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,10 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" -#include "llvm/CallingConv.h" -#include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" @@ -32,18 +29,13 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -52,7 +44,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" @@ -69,10 +60,6 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); -static cl::opt<bool> -SchedLiveInCopies("schedule-livein-copies", cl::Hidden, - cl::desc("Schedule copies of livein registers"), - cl::init(false)); #ifndef NDEBUG static cl::opt<bool> @@ -161,9 +148,9 @@ namespace llvm { // When new basic blocks are inserted and the edges from MBB to its successors // are modified, the method should insert pairs of <OldSucc, NewSucc> into the // DenseMap. -MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { +MachineBasicBlock * +TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { #ifndef NDEBUG dbgs() << "If a target marks an instruction with " "'usesCustomInserter', it must implement " @@ -173,115 +160,15 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return 0; } -/// EmitLiveInCopy - Emit a copy for a live in physical register. If the -/// physical register has only a single copy use, then coalesced the copy -/// if possible. -static void EmitLiveInCopy(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &InsertPos, - unsigned VirtReg, unsigned PhysReg, - const TargetRegisterClass *RC, - DenseMap<MachineInstr*, unsigned> &CopyRegMap, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - unsigned NumUses = 0; - MachineInstr *UseMI = NULL; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), - UE = MRI.use_end(); UI != UE; ++UI) { - UseMI = &*UI; - if (++NumUses > 1) - break; - } - - // If the number of uses is not one, or the use is not a move instruction, - // don't coalesce. Also, only coalesce away a virtual register to virtual - // register copy. - bool Coalesced = false; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (NumUses == 1 && - TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { - VirtReg = DstReg; - Coalesced = true; - } - - // Now find an ideal location to insert the copy. - MachineBasicBlock::iterator Pos = InsertPos; - while (Pos != MBB->begin()) { - MachineInstr *PrevMI = prior(Pos); - DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); - // copyRegToReg might emit multiple instructions to do a copy. - unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; - if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) - // This is what the BB looks like right now: - // r1024 = mov r0 - // ... - // r1 = mov r1024 - // - // We want to insert "r1025 = mov r1". Inserting this copy below the - // move to r1024 makes it impossible for that move to be coalesced. - // - // r1025 = mov r1 - // r1024 = mov r0 - // ... - // r1 = mov 1024 - // r2 = mov 1025 - break; // Woot! Found a good location. - --Pos; - } - - bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); - if (Coalesced) { - if (&*InsertPos == UseMI) ++InsertPos; - MBB->erase(UseMI); - } -} - -/// EmitLiveInCopies - If this is the first basic block in the function, -/// and if it has live ins that need to be copied into vregs, emit the -/// copies into the block. -static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - if (SchedLiveInCopies) { - // Emit the copies at a heuristically-determined location in the block. - DenseMap<MachineInstr*, unsigned> CopyRegMap; - MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - E = MRI.livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = MRI.getRegClass(LI->second); - EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, - RC, CopyRegMap, MRI, TRI, TII); - } - } else { - // Emit the copies into the top of the block. - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - E = MRI.livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = MRI.getRegClass(LI->second); - bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - } - } -} - //===----------------------------------------------------------------------===// // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(TLI, *FuncInfo)), - SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)), + CurDAG(new SelectionDAG(tm, *FuncInfo)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), DAGSize(0) @@ -293,10 +180,6 @@ SelectionDAGISel::~SelectionDAGISel() { delete FuncInfo; } -unsigned SelectionDAGISel::MakeReg(EVT VT) { - return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); -} - void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); @@ -306,129 +189,75 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { - Function &Fn = *mf.getFunction(); - // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || EnableFastISel) && "-fast-isel-abort requires -fast-isel"); - // Get alias analysis for load/store combining. - AA = &getAnalysis<AliasAnalysis>(); - - MF = &mf; + const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - if (Fn.hasGC()) - GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn); - else - GFI = 0; + MF = &mf; RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, EnableFastISel); SDB->init(GFI, *AA); - for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator())) - // Mark landing pad. - FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); - - SelectAllBasicBlocks(Fn, *MF, TII); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. - EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII); - - // Add function live-ins to entry block live-in set. - for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), - E = RegInfo->livein_end(); I != E; ++I) - MF->begin()->addLiveIn(I->first); - -#ifndef NDEBUG - assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() && - "Not all catch info was assigned to a landing pad!"); -#endif + MachineBasicBlock *EntryMBB = MF->begin(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + + // Insert DBG_VALUE instructions for function arguments to the entry block. + for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { + MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + EntryMBB->insert(EntryMBB->begin(), MI); + else { + MachineInstr *Def = RegInfo->getVRegDef(Reg); + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } + } + // Release function-specific state. SDB and CurDAG are already cleared + // at this point. FuncInfo->clear(); return true; } -/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is -/// attached with this instruction. -static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB, - FastISel *FastIS, MachineFunction *MF) { - DebugLoc DL = I->getDebugLoc(); - if (DL.isUnknown()) return; - - SDB->setCurDebugLoc(DL); - - if (FastIS) - FastIS->setCurDebugLoc(DL); - - // If the function doesn't have a default debug location yet, set - // it. This is kind of a hack. - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(DL); -} - -/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown. -static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) { - SDB->setCurDebugLoc(DebugLoc()); - if (FastIS) - FastIS->setCurDebugLoc(DebugLoc()); -} - -void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, - BasicBlock::iterator Begin, - BasicBlock::iterator End, - bool &HadTailCall) { - SDB->setCurrentBasicBlock(BB); - +MachineBasicBlock * +SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, + const BasicBlock *LLVMBB, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted - // as a tail call, cease emitting nodes for this block. - for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { - SetDebugLoc(I, SDB, 0, MF); - - if (!isa<TerminatorInst>(I)) { - SDB->visit(*I); - - // Set the current debug location back to "unknown" so that it doesn't - // spuriously apply to subsequent instructions. - ResetDebugLoc(SDB, 0); - } - } - - if (!SDB->HasTailCall) { - // Ensure that all instructions which are used outside of their defining - // blocks are available as virtual registers. Invoke is handled elsewhere. - for (BasicBlock::iterator I = Begin; I != End; ++I) - if (!isa<PHINode>(I) && !isa<InvokeInst>(I)) - SDB->CopyToExportRegsIfNeeded(I); - - // Handle PHI nodes in successor blocks. - if (End == LLVMBB->end()) { - HandlePHINodesInSuccessorBlocks(LLVMBB); - - // Lower the terminator after the copies are emitted. - SetDebugLoc(LLVMBB->getTerminator(), SDB, 0, MF); - SDB->visit(*LLVMBB->getTerminator()); - ResetDebugLoc(SDB, 0); - } - } + // as a tail call, cease emitting nodes for this block. Terminators + // are handled below. + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) + SDB->visit(*I); // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDB->getControlRoot()); - - // Final step, emit the lowered DAG as machine code. - CodeGenAndEmitDAG(); HadTailCall = SDB->HasTailCall; SDB->clear(); + + // Final step, emit the lowered DAG as machine code. + return CodeGenAndEmitDAG(BB); } namespace { @@ -493,7 +322,7 @@ void SelectionDAGISel::ShrinkDemandedOps() { InWorklist.insert(I); } - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true); + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); while (!Worklist.empty()) { SDNode *N = Worklist.pop_back_val(); InWorklist.erase(N); @@ -613,7 +442,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } while (!Worklist.empty()); } -void SelectionDAGISel::CodeGenAndEmitDAG() { +MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { std::string GroupName; if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; @@ -763,9 +592,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // inserted into. if (TimePassesIsEnabled) { NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(&SDB->EdgeMapping); + BB = Scheduler->EmitSchedule(); } else { - BB = Scheduler->EmitSchedule(&SDB->EdgeMapping); + BB = Scheduler->EmitSchedule(); } // Free the scheduler state. @@ -776,8 +605,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { delete Scheduler; } - DEBUG(dbgs() << "Selected machine code:\n"); - DEBUG(BB->dump()); + // Free the SelectionDAG state, now that we're finished with it. + CurDAG->clear(); + + return BB; } void SelectionDAGISel::DoInstructionSelection() { @@ -836,128 +667,94 @@ void SelectionDAGISel::DoInstructionSelection() { PostprocessISelDAG(); } +/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and +/// do other setup for EH landing-pad blocks. +void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MCSymbol *Label = MF->getMMI().addLandingPad(BB); + + const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) BB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) BB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results + // in exceptions not being caught because no typeids are associated with + // the invoke. This may not be the only way things can go wrong, but it + // is the only way we try to work around for the moment. + const BasicBlock *LLVMBB = BB->getBasicBlock(); + const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::const_iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isa<EHSelectorInst>(I)) + break; -void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, - MachineFunction &MF, - const TargetInstrInfo &TII) { + if (I == E) + // No catch info found - try to extract some from the successor. + CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); + } +} + +void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(MF, FuncInfo->ValueMap, FuncInfo->MBBMap, - FuncInfo->StaticAllocaMap + FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap, + FuncInfo->StaticAllocaMap, + FuncInfo->PHINodesToUpdate #ifndef NDEBUG , FuncInfo->CatchInfoLost #endif ); // Iterate over all basic blocks in the function. - for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - BasicBlock *LLVMBB = &*I; - BB = FuncInfo->MBBMap[LLVMBB]; + for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + const BasicBlock *LLVMBB = &*I; + MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB]; - BasicBlock::iterator const Begin = LLVMBB->begin(); - BasicBlock::iterator const End = LLVMBB->end(); - BasicBlock::iterator BI = Begin; + BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const End = LLVMBB->end(); + BasicBlock::const_iterator BI = Begin; // Lower any arguments needed in this block if this is the entry block. - bool SuppressFastISel = false; - if (LLVMBB == &Fn.getEntryBlock()) { + if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - // If any of the arguments has the byval attribute, forgo - // fast-isel in the entry block. - if (FastIS) { - unsigned j = 1; - for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); - I != E; ++I, ++j) - if (Fn.paramHasAttr(j, Attribute::ByVal)) { - if (EnableFastISelVerbose || EnableFastISelAbort) - dbgs() << "FastISel skips entry block due to byval argument\n"; - SuppressFastISel = true; - break; - } - } - } - - if (BB->isLandingPad()) { - // Add a label to mark the beginning of the landing pad. Deletion of the - // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF.getMMI().addLandingPad(BB); - - const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL); - BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); - - // Mark exception register as live in. - unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) BB->addLiveIn(Reg); - - // Mark exception selector register as live in. - Reg = TLI.getExceptionSelectorRegister(); - if (Reg) BB->addLiveIn(Reg); - - // FIXME: Hack around an exception handling flaw (PR1508): the personality - // function and list of typeids logically belong to the invoke (or, if you - // like, the basic block containing the invoke), and need to be associated - // with it in the dwarf exception handling tables. Currently however the - // information is provided by an intrinsic (eh.selector) that can be moved - // to unexpected places by the optimizers: if the unwind edge is critical, - // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results - // in exceptions not being caught because no typeids are associated with - // the invoke. This may not be the only way things can go wrong, but it - // is the only way we try to work around for the moment. - BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); - - if (Br && Br->isUnconditional()) { // Critical edge? - BasicBlock::iterator I, E; - for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) - if (isa<EHSelectorInst>(I)) - break; - - if (I == E) - // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF.getMMI(), *FuncInfo); - } - } - + // Setup an EH landing-pad block. + if (BB->isLandingPad()) + PrepareEHLandingPad(BB); + // Before doing SelectionDAG ISel, see if FastISel has been requested. - if (FastIS && !SuppressFastISel) { + if (FastIS) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { CurDAG->setRoot(SDB->getControlRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. for (; BI != End; ++BI) { - // Just before the terminator instruction, insert instructions to - // feed PHI nodes in successor blocks. - if (isa<TerminatorInst>(BI)) - if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { - ++NumFastIselFailures; - ResetDebugLoc(SDB, FastIS); - if (EnableFastISelVerbose || EnableFastISelAbort) { - dbgs() << "FastISel miss: "; - BI->dump(); - } - assert(!EnableFastISelAbort && - "FastISel didn't handle a PHI in a successor"); - break; - } - - SetDebugLoc(BI, SDB, FastIS, &MF); - // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(BI)) { - ResetDebugLoc(SDB, FastIS); + if (FastIS->SelectInstruction(BI)) continue; - } - - // Clear out the debug location so that it doesn't carry over to - // unrelated instructions. - ResetDebugLoc(SDB, FastIS); // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { @@ -967,14 +764,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, BI->dump(); } - if (!BI->getType()->isVoidTy()) { + if (!BI->getType()->isVoidTy() && !BI->use_empty()) { unsigned &R = FuncInfo->ValueMap[BI]; if (!R) R = FuncInfo->CreateRegForValue(BI); } bool HadTailCall = false; - SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall); + BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { @@ -1010,44 +807,41 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // block. if (BI != End) { bool HadTailCall; - SelectBasicBlock(LLVMBB, BI, End, HadTailCall); + BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall); } - FinishBasicBlock(); + FinishBasicBlock(BB); + FuncInfo->PHINodesToUpdate.clear(); } delete FastIS; } void -SelectionDAGISel::FinishBasicBlock() { - - DEBUG(dbgs() << "Target-post-processed machine code:\n"); - DEBUG(BB->dump()); +SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { DEBUG(dbgs() << "Total amount of phi nodes to update: " - << SDB->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) + << FuncInfo->PHINodesToUpdate.size() << "\n"); + DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" - << SDB->PHINodesToUpdate[i].first - << ", " << SDB->PHINodesToUpdate[i].second << ")\n"); + << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. if (SDB->SwitchCases.empty() && SDB->JTCases.empty() && SDB->BitTestCases.empty()) { - for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (!BB->isSuccessor(PHI->getParent())) continue; - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, - false)); + PHI->addOperand( + MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } - SDB->PHINodesToUpdate.clear(); return; } @@ -1056,37 +850,38 @@ SelectionDAGISel::FinishBasicBlock() { if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into BB = SDB->BitTestCases[i].Parent; - SDB->setCurrentBasicBlock(BB); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i]); + SDB->visitBitTestHeader(SDB->BitTestCases[i], BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into BB = SDB->BitTestCases[i].Cases[j].ThisBB; - SDB->setCurrentBasicBlock(BB); // Emit the code if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j]); + SDB->BitTestCases[i].Cases[j], + BB); else SDB->visitBitTestCase(SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j]); + SDB->BitTestCases[i].Cases[j], + BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } // Update PHI Nodes - for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); @@ -1094,10 +889,12 @@ SelectionDAGISel::FinishBasicBlock() { // from last "case" BB. if (PHIBB == SDB->BitTestCases[i].Default) { PHI->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); PHI->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. back().ThisBB)); } @@ -1107,7 +904,8 @@ SelectionDAGISel::FinishBasicBlock() { MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; if (cBB->isSuccessor(PHIBB)) { PHI->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(cBB)); } } @@ -1123,40 +921,42 @@ SelectionDAGISel::FinishBasicBlock() { if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into BB = SDB->JTCases[i].first.HeaderBB; - SDB->setCurrentBasicBlock(BB); // Emit the code - SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first); + SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, + BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } // Set the current basic block to the mbb we wish to insert the code into BB = SDB->JTCases[i].second.MBB; - SDB->setCurrentBasicBlock(BB); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); // Update PHI Nodes - for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. if (PHIBB == SDB->JTCases[i].second.Default) { PHI->addOperand - (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here if (BB->isSuccessor(PHIBB)) { PHI->addOperand - (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } @@ -1165,13 +965,13 @@ SelectionDAGISel::FinishBasicBlock() { // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. - for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (BB->isSuccessor(PHI->getParent())) { - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, - false)); + PHI->addOperand( + MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } @@ -1181,26 +981,26 @@ SelectionDAGISel::FinishBasicBlock() { for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; - SDB->setCurrentBasicBlock(BB); - // Emit the code - SDB->visitSwitchCase(SDB->SwitchCases[i]); + // Determine the unique successors. + SmallVector<MachineBasicBlock *, 2> Succs; + Succs.push_back(SDB->SwitchCases[i].TrueBB); + if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SwitchCases[i].FalseBB); + + // Emit the code. Note that this could result in ThisBB being split, so + // we need to check for updates. + SDB->visitSwitchCase(SDB->SwitchCases[i], BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); + SDB->clear(); + ThisBB = CodeGenAndEmitDAG(BB); // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. - while ((BB = SDB->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. - // If new BB's are created during scheduling, the edges may have been - // updated. That is, the edge from ThisBB to BB may have been split and - // BB's predecessor is now another block. - DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI = - SDB->EdgeMapping.find(BB); - if (EI != SDB->EdgeMapping.end()) - ThisBB = EI->second; - + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + BB = Succs[i]; // BB may have been removed from the CFG if a branch was constant folded. if (ThisBB->isSuccessor(BB)) { for (MachineBasicBlock::iterator Phi = BB->begin(); @@ -1208,11 +1008,11 @@ SelectionDAGISel::FinishBasicBlock() { ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { - assert(pn != SDB->PHINodesToUpdate.size() && + assert(pn != FuncInfo->PHINodesToUpdate.size() && "Didn't find PHI entry!"); - if (SDB->PHINodesToUpdate[pn].first == Phi) { + if (FuncInfo->PHINodesToUpdate[pn].first == Phi) { Phi->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pn].second, + CreateReg(FuncInfo->PHINodesToUpdate[pn].second, false)); Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); break; @@ -1220,21 +1020,9 @@ SelectionDAGISel::FinishBasicBlock() { } } } - - // Don't process RHS if same block as LHS. - if (BB == SDB->SwitchCases[i].FalseBB) - SDB->SwitchCases[i].FalseBB = 0; - - // If we haven't handled the RHS, do so now. Otherwise, we're done. - SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB; - SDB->SwitchCases[i].FalseBB = 0; } - assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0); - SDB->clear(); } SDB->SwitchCases.clear(); - - SDB->PHINodesToUpdate.clear(); } @@ -1333,16 +1121,17 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { std::vector<SDValue> InOps; std::swap(InOps, Ops); - Ops.push_back(InOps[0]); // input chain. - Ops.push_back(InOps[1]); // input asm string. + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc - unsigned i = 2, e = InOps.size(); + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) --e; // Don't process a flag operand if it is here. while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); - if ((Flags & 7) != 4 /*MEM*/) { + if (!InlineAsm::isMemKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); @@ -1352,14 +1141,14 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { "Memory operand with multiple values?"); // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { - llvm_report_error("Could not match memory address. Inline asm" - " failure!"); - } + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + report_fatal_error("Could not match memory address. Inline asm" + " failure!"); // Add this to the output node. - Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), - MVT::i32)); + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } @@ -1436,7 +1225,8 @@ bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, /// IsLegalToFold - Returns true if the specific operand node N of /// U can be folded during instruction selection that starts at Root. bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, - bool IgnoreChains) const { + CodeGenOpt::Level OptLevel, + bool IgnoreChains) { if (OptLevel == CodeGenOpt::None) return false; // If Root use can somehow reach N through a path that that doesn't contain @@ -2011,6 +1801,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, } } +namespace { struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. @@ -2032,6 +1823,8 @@ struct MatchScope { bool HasChainNodesMatched, HasFlagResultNodesMatched; }; +} + SDNode *SelectionDAGISel:: SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned TableSize) { @@ -2045,6 +1838,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: case ISD::TargetConstant: case ISD::TargetConstantFP: case ISD::TargetConstantPool: @@ -2383,7 +2177,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), NodeToMatch) || !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), - NodeToMatch, true/*We validate our own chains*/)) + NodeToMatch, OptLevel, + true/*We validate our own chains*/)) break; continue; @@ -2776,7 +2571,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { else Msg << "unknown intrinsic #" << iid; } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ea2ff2f36891..8a4a1b172608 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -480,7 +480,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { } /// NOTE: The constructor takes ownership of TLOF. -TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) +TargetLowering::TargetLowering(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); @@ -720,7 +721,7 @@ void TargetLowering::computeRegisterProperties() { unsigned NElts = VT.getVectorNumElements(); for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && + if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts && NElts != 1) { TransformToType[i] = SVT; ValueTypeActions.setTypeAction(VT, Promote); @@ -1279,8 +1280,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. if (DemandedMask == 1) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1))); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { EVT VT = Op.getValueType(); @@ -1465,23 +1467,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. - if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ - APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, - OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()); - HighBits.trunc(BitWidth); - - if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { - // None of the shifted in bits are needed. Add a truncate of the - // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), - In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, - In.getOperand(1))); - } + if (TLO.LegalTypes() && + !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (!ShAmt) + break; + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); } break; } @@ -1874,10 +1882,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isa<ConstantSDNode>(Op0.getOperand(1)) && cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. - if (Op0.getValueType() != VT) + if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), DAG.getConstant(1, VT)); + else if (Op0.getValueType().bitsLT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); @@ -2245,7 +2258,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. -bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const { if (isa<GlobalAddressSDNode>(N)) { GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp new file mode 100644 index 000000000000..d20477fd890e --- /dev/null +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -0,0 +1,21 @@ +//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSelectionDAGInfo.h" +using namespace llvm; + +TargetSelectionDAGInfo::TargetSelectionDAGInfo() { +} + +TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { +} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 0e6d4796e22c..5240bef5a5ff 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -160,7 +160,7 @@ namespace { Args.clear(); Args.append(CI->op_begin() + 1, CI->op_end()); - InvokeInst *II = InvokeInst::Create(CI->getOperand(0), + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args.begin(), Args.end(), CI->getName(), CallBB); diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 15ca3740b8f2..1f68a6f276f7 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -179,7 +179,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\t\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; @@ -187,7 +187,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, } DEBUG({ - dbgs() << "\nExtending: "; + dbgs() << "Extending: "; IntB.print(dbgs(), tri_); }); @@ -236,7 +236,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // If the copy instruction was killing the destination register before the // merge, find the last use and trim the live range. That will also add the // isKill marker. - if (CopyMI->killsRegister(IntA.reg)) + if (ALR->valno->isKill(CopyIdx)) TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); ++numExtends; @@ -259,6 +259,9 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; + // When BValNo is null, we're looking for a dummy clobber-value for a subreg. + if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy()) + continue; if (BI->start <= AI->start && BI->end > AI->start) return true; if (BI->start > AI->start && BI->start < AI->end) @@ -369,6 +372,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) return false; + bool BHasSubRegs = false; + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + BHasSubRegs = *tri_->getSubRegisters(IntB.reg); + + // Abort if the subregisters of IntB.reg have values that are not simply the + // clobbers from the superreg. + if (BHasSubRegs) + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) + if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + return false; + // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. for (MachineRegisterInfo::use_nodbg_iterator UI = @@ -417,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, BExtend[ALR->end] = BLR->end; // Update uses of IntA of the specific Val# with IntB. - bool BHasSubRegs = false; - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - BHasSubRegs = *tri_->getSubRegisters(IntB.reg); for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), UE = mri_->use_end(); UI != UE;) { MachineOperand &UseMO = UI.getOperand(); @@ -470,7 +481,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // We need to insert a new liverange: [ALR.start, LastUse). It may be we can // simply extend BLR if CopyMI doesn't end the range. DEBUG({ - dbgs() << "\nExtending: "; + dbgs() << "Extending: "; IntB.print(dbgs(), tri_); }); @@ -523,7 +534,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, DEBUG({ dbgs() << " result = "; IntB.print(dbgs(), tri_); - dbgs() << '\n'; dbgs() << "\nShortening: "; IntA.print(dbgs(), tri_); }); @@ -709,7 +719,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, // kill. bool checkForDeadDef = false; MachineBasicBlock *MBB = CopyMI->getParent(); - if (CopyMI->killsRegister(SrcInt.reg)) + if (SrcLR->valno->isKill(DefIdx)) if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { checkForDeadDef = true; } @@ -804,7 +814,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { // If the use is a copy and it won't be coalesced away, and its source // is defined by a trivial computation, try to rematerialize it instead. - if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, + if (!JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, CopyDstSubIdx, UseMI)) continue; } @@ -824,6 +835,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) UseMI->addRegisterKilled(DstReg, tri_, true); } + DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI) + << "\t" << *UseMI); continue; } @@ -836,11 +849,11 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, assert(OldSubIdx < SubIdx && "Conflicting sub-register index!"); else if (SubIdx) O.setSubReg(SubIdx); - // Remove would-be duplicated kill marker. - if (O.isKill() && UseMI->killsRegister(DstReg)) - O.setIsKill(false); O.setReg(DstReg); + DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI) + << "\t" << *UseMI); + // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. if (JoinedCopies.count(UseMI)) @@ -865,38 +878,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, } } -/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate -/// due to live range lengthening as the result of coalescing. -void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, - LiveInterval &LI) { - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), - UE = mri_->use_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - if (!UseMO.isKill()) - continue; - MachineInstr *UseMI = UseMO.getParent(); - SlotIndex UseIdx = - li_->getInstructionIndex(UseMI).getUseIndex(); - const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); - if (!LR || - (!LR->valno->isKill(UseIdx.getDefIndex()) && - LR->valno->def != UseIdx.getDefIndex())) { - // Interesting problem. After coalescing reg1027's def and kill are both - // at the same point: %reg1027,0.000000e+00 = [56,814:0) 0@70-(814) - // - // bb5: - // 60 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0 - // 68 %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0 - // 76 t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def> - // 84 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0 - // 96 t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill> - // - // Do not remove the kill marker on t2LDRi12. - UseMO.setIsKill(false); - } - } -} - /// removeIntervalIfEmpty - Check if the live interval of a physical register /// is empty, if so remove it and also remove the empty intervals of its /// sub-registers. Return true if live interval is removed. @@ -1064,9 +1045,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(DstInt); if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg), - mri_->use_nodbg_end()) / Length) < - (1.0 / Threshold))) + std::distance(mri_->use_nodbg_begin(DstInt.reg), + mri_->use_nodbg_end()) * Threshold < Length) return false; // If the virtual register live interval extends into a loop, turn down @@ -1122,9 +1102,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(SrcInt); if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg), - mri_->use_nodbg_end()) / Length) < - (1.0 / Threshold))) + std::distance(mri_->use_nodbg_begin(SrcInt.reg), + mri_->use_nodbg_end()) * Threshold < Length) return false; if (SrcInt.empty()) @@ -1168,20 +1147,42 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, - unsigned SmallReg, - unsigned Threshold) { - // Then make sure the intervals are *short*. - LiveInterval &LargeInt = li_->getInterval(LargeReg); - LiveInterval &SmallInt = li_->getInterval(SmallReg); - unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); - unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); - if (LargeSize > Threshold) { - unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg), - mri_->use_nodbg_end()); - unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg), - mri_->use_nodbg_end()); - if (SmallUses*LargeSize < LargeUses*SmallSize) +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = allocatableRCRegs_[NewRC].count(); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + if (SrcSize <= NewRCCount && DstSize <= NewRCCount) + return true; + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + unsigned NewUses = SrcUses + DstUses; + unsigned NewSize = SrcSize + DstSize; + if (SrcRC != NewRC && SrcSize > NewRCCount) { + unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count(); + if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) + return false; + } + if (DstRC != NewRC && DstSize > NewRCCount) { + unsigned DstRCCount = allocatableRCRegs_[DstRC].count(); + if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) return false; } return true; @@ -1263,7 +1264,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { DEBUG({ - dbgs() << "Interfere with register "; + dbgs() << "\t\tInterfere with register "; li_->getInterval(RealDstReg).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1275,7 +1276,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, !tri_->isSubRegister(DstReg, *SR) && li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\t\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1298,7 +1299,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealSrcReg) && LHS.overlaps(li_->getInterval(RealSrcReg))) { DEBUG({ - dbgs() << "Interfere with register "; + dbgs() << "\t\tInterfere with register "; li_->getInterval(RealSrcReg).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1310,7 +1311,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, !tri_->isSubRegister(SrcReg, *SR) && li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\t\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1400,6 +1401,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } + // We cannot handle dual subreg indices and mismatched classes at the same + // time. + if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) { + DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n"); + return false; + } + // Check that a physical source register is compatible with dst regclass if (SrcIsPhys) { unsigned SrcSubReg = SrcSubIdx ? @@ -1517,10 +1525,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable } - unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; - unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; - unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); - if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) { + if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1568,49 +1577,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } - unsigned LargeReg = SrcReg; - unsigned SmallReg = DstReg; - // Now determine the register class of the joined register. - if (isExtSubReg) { - if (SubIdx && DstRC && DstRC->isASubClass()) { - // This is a move to a sub-register class. However, the source is a - // sub-register of a larger register class. We don't know what should - // the register class be. FIXME. - Again = true; - return false; + if (!SrcIsPhys && !DstIsPhys) { + if (isExtSubReg) { + NewRC = + SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; + } else if (isInsSubReg) { + NewRC = + SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; + } else { + NewRC = getCommonSubClass(SrcRC, DstRC); } - if (!DstIsPhys && !SrcIsPhys) - NewRC = SrcRC; - } else if (!SrcIsPhys && !DstIsPhys) { - NewRC = getCommonSubClass(SrcRC, DstRC); + if (!NewRC) { DEBUG(dbgs() << "\tDisjoint regclasses: " << SrcRC->getName() << ", " << DstRC->getName() << ".\n"); return false; // Not coalescable. } - if (DstRC->getSize() > SrcRC->getSize()) - std::swap(LargeReg, SmallReg); - } - // If we are joining two virtual registers and the resulting register - // class is more restrictive (fewer register, smaller size). Check if it's - // worth doing the merge. - if (!SrcIsPhys && !DstIsPhys && - (isExtSubReg || DstRC->isASubClass()) && - !isWinToJoinCrossClass(LargeReg, SmallReg, - allocatableRCRegs_[NewRC].count())) { - DEBUG(dbgs() << "\tSrc/Dest are different register classes: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - // Allow the coalescer to try again in case either side gets coalesced to - // a physical register that's compatible with the other side. e.g. - // r1024 = MOV32to32_ r1025 - // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. - Again = true; // May be possible to coalesce later. - return false; + // If we are joining two virtual registers and the resulting register + // class is more restrictive (fewer register, smaller size). Check if it's + // worth doing the merge. + if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); + // Allow the coalescer to try again in case either side gets coalesced to + // a physical register that's compatible with the other side. e.g. + // r1024 = MOV32to32_ r1025 + // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + Again = true; // May be possible to coalesce later. + return false; + } } } @@ -1626,9 +1626,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { "Register mapping is horribly broken!"); DEBUG({ - dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_); - dbgs() << " and "; DstInt.print(dbgs(), tri_); - dbgs() << ": "; + dbgs() << "\t\tInspecting "; + if (SrcRC) dbgs() << SrcRC->getName() << ": "; + SrcInt.print(dbgs(), tri_); + dbgs() << "\n\t\t and "; + if (DstRC) dbgs() << DstRC->getName() << ": "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; }); // Save a copy of the virtual register live interval. We'll manually @@ -1672,10 +1676,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - float Ratio = 1.0 / Threshold; if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(JoinVReg), - mri_->use_nodbg_end()) / Length) < Ratio)) { + std::distance(mri_->use_nodbg_begin(JoinVReg), + mri_->use_nodbg_end()) * Threshold < Length) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) @@ -1701,7 +1704,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Only coalesce an empty interval (defined by implicit_def) with // another interval which has a valno defined by the CopyMI and the CopyMI // is a kill of the implicit def. - DEBUG(dbgs() << "Not profitable!\n"); + DEBUG(dbgs() << "\tNot profitable!\n"); return false; } } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { @@ -1718,12 +1721,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { JoinedCopies.insert(CopyMI); - DEBUG(dbgs() << "Trivial!\n"); + DEBUG(dbgs() << "\tTrivial!\n"); return true; } // Otherwise, we are unable to join the intervals. - DEBUG(dbgs() << "Interference!\n"); + DEBUG(dbgs() << "\tInterference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1794,12 +1797,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); - // Some live range has been lengthened due to colaescing, eliminate the - // unnecessary kills. - RemoveUnnecessaryKills(SrcReg, *ResDstInt); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) - RemoveUnnecessaryKills(DstReg, *ResDstInt); - UpdateRegDefsUses(SrcReg, DstReg, SubIdx); // If we have extended the live range of a physical register, make sure we @@ -1843,7 +1840,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } DEBUG({ - dbgs() << "\n\t\tJoined. Result = "; + dbgs() << "\t\tJoined. Result = "; ResDstInt->print(dbgs(), tri_); dbgs() << "\n"; }); @@ -2198,7 +2195,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; @@ -2215,7 +2212,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; @@ -2673,13 +2670,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, return NULL; } -void SimpleRegisterCoalescing::printRegName(unsigned reg) const { - if (TargetRegisterInfo::isPhysicalRegister(reg)) - dbgs() << tri_->getName(reg); - else - dbgs() << "%reg" << reg; -} - void SimpleRegisterCoalescing::releaseMemory() { JoinedCopies.clear(); ReMatCopies.clear(); @@ -2744,7 +2734,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // delete them later. DoDelete = false; } - if (MI->registerDefIsDead(DstReg)) { + if (MI->allDefsAreDead()) { LiveInterval &li = li_->getInterval(DstReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); @@ -2808,8 +2798,25 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; - } else { - ++mii; + continue; + } + + ++mii; + + // Check for now unnecessary kill flags. + if (li_->isNotInMIMap(MI)) continue; + SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + unsigned reg = MO.getReg(); + if (!reg || !li_->hasInterval(reg)) continue; + LiveInterval &LI = li_->getInterval(reg); + const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); + if (!LR || + (!LR->valno->isKill(UseIdx.getDefIndex()) && + LR->valno->def != UseIdx.getDefIndex())) + MO.setIsKill(false); } } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index f668064ab08e..1be04f32aa69 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -179,8 +179,11 @@ namespace llvm { /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. - bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg, - unsigned Threshold); + bool isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC); /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual /// register with a physical register, check if any of the virtual register @@ -220,10 +223,6 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); - /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate - /// due to live range lengthening as the result of coalescing. - void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); - /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. /// Return true if live interval is removed. bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI); @@ -243,8 +242,6 @@ namespace llvm { /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; - - void printRegName(unsigned reg) const; }; } // End llvm namespace diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 7ba44031714b..63c55549a3ea 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -46,7 +46,6 @@ namespace { /// Utility class for spillers. class SpillerBase : public Spiller { protected: - MachineFunction *mf; LiveIntervals *lis; MachineFrameInfo *mfi; @@ -160,9 +159,11 @@ protected: return added; } - }; +} // end anonymous namespace + +namespace { /// Spills any live range using the spill-everywhere method with no attempt at /// folding. @@ -178,9 +179,12 @@ public: // Ignore spillIs - we don't use it. return trivialSpillEverywhere(li); } - }; +} // end anonymous namespace + +namespace { + /// Falls back on LiveIntervals::addIntervalsForSpills. class StandardSpiller : public Spiller { protected: @@ -198,9 +202,12 @@ public: SlotIndex*) { return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); } - }; +} // end anonymous namespace + +namespace { + /// When a call to spill is placed this spiller will first try to break the /// interval up into its component values (one new interval per value). /// If this fails, or if a call is placed to spill a previously split interval @@ -513,15 +520,16 @@ private: }; -} +} // end anonymous namespace + llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) { switch (spillerOpt) { - case trivial: return new TrivialSpiller(mf, lis, vrm); break; - case standard: return new StandardSpiller(lis, loopInfo, vrm); break; - case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break; - default: llvm_unreachable("Unreachable!"); break; + default: assert(0 && "unknown spiller"); + case trivial: return new TrivialSpiller(mf, lis, vrm); + case standard: return new StandardSpiller(lis, loopInfo, vrm); + case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); } } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 12d38f0a76d0..42dfd7fbcb08 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -182,7 +182,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { if (!LS->hasInterval(FI)) continue; LiveInterval &li = LS->getInterval(FI); - li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); + if (!MI->isDebugValue()) + li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); SSRefs[FI].push_back(MI); } } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index e9e998f81d55..0ad6619ac4fd 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -40,7 +40,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Don't know how to commute: " << *MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d6bdb1040f3e..9f959932e6d5 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -38,110 +38,88 @@ using namespace dwarf; //===----------------------------------------------------------------------===// // ELF //===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; - -TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { - // If we have the section uniquing map, free it. - delete (ELFUniqueMapTy*)UniquingMap; -} - -const MCSection *TargetLoweringObjectFileELF:: -getELFSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind, bool IsExplicit) const { - if (UniquingMap == 0) - UniquingMap = new ELFUniqueMapTy(); - ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section); - if (Entry.getValue()) return Entry.getValue(); - - MCSectionELF *Result = MCSectionELF::Create(Entry.getKey(), Type, Flags, Kind, - IsExplicit, getContext()); - Entry.setValue(Result); - return Result; -} void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TM) { - if (UniquingMap != 0) - ((ELFUniqueMapTy*)UniquingMap)->clear(); TargetLoweringObjectFile::Initialize(Ctx, TM); BSSSection = - getELFSection(".bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); + getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); TextSection = - getELFSection(".text", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, - SectionKind::getText()); + getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, + SectionKind::getText()); DataSection = - getELFSection(".data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); ReadOnlySection = - getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, - SectionKind::getReadOnly()); + getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); TLSDataSection = - getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); + getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, + SectionKind::getThreadData()); TLSBSSSection = - getELFSection(".tbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); + getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, + SectionKind::getThreadBSS()); DataRelSection = - getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); DataRelLocalSection = - getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRelLocal()); + getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRelLocal()); DataRelROSection = - getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRel()); + getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); DataRelROLocalSection = - getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); + getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); MergeableConst4Section = - getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst4()); + getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst4()); MergeableConst8Section = - getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst8()); + getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst8()); MergeableConst16Section = - getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst16()); + getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst16()); StaticCtorSection = - getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); StaticDtorSection = - getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); // Exception Handling Sections. @@ -150,47 +128,48 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, // runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. LSDASection = - getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); + getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); EHFrameSection = - getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); // Debug Info Sections. DwarfAbbrevSection = - getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfInfoSection = - getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfLineSection = - getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfFrameSection = - getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfPubNamesSection = - getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfPubTypesSection = - getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfStrSection = - getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfLocSection = - getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfARangesSection = - getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfRangesSection = - getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfMacroInfoSection = - getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); } @@ -279,9 +258,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); - return getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind, true); + return getContext().getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind, true); } static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { @@ -300,19 +279,54 @@ static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".gnu.linkonce.d.rel.ro."; } +/// getSectionPrefixForGlobal - Return the section prefix name used by options +/// FunctionsSections and DataSections. +static const char *getSectionPrefixForGlobal(SectionKind Kind) { + if (Kind.isText()) return ".text."; + if (Kind.isReadOnly()) return ".rodata."; + + if (Kind.isThreadData()) return ".tdata."; + if (Kind.isThreadBSS()) return ".tbss."; + + if (Kind.isDataNoRel()) return ".data."; + if (Kind.isDataRelLocal()) return ".data.rel.local."; + if (Kind.isDataRel()) return ".data.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".data.rel.ro."; +} + + const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { - const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); + if ((GV->isWeakForLinker() || EmitUniquedSection) && + !Kind.isCommon() && !Kind.isBSS()) { + const char *Prefix; + if (GV->isWeakForLinker()) + Prefix = getSectionPrefixForUniqueGlobal(Kind); + else { + assert(EmitUniquedSection); + Prefix = getSectionPrefixForGlobal(Kind); + } + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); MCSymbol *Sym = Mang->getSymbol(GV); Name.append(Sym->getName().begin(), Sym->getName().end()); - return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), - getELFSectionFlags(Kind), Kind); + return getContext().getELFSection(Name.str(), + getELFSectionType(Name.str(), Kind), + getELFSectionFlags(Kind), Kind); } if (Kind.isText()) return TextSection; @@ -337,11 +351,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, std::string Name = SizeSpec + utostr(Align); - return getELFSection(Name, MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::SHF_MERGE | - MCSectionELF::SHF_STRINGS, - Kind); + return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_MERGE | + MCSectionELF::SHF_STRINGS, + Kind); } if (Kind.isMergeableConst()) { @@ -426,43 +440,6 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // MachO //===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; - -TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)UniquingMap; -} - - -const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, getContext()); -} - - void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { // _foo.eh symbols are currently always exported so that the linker knows @@ -473,29 +450,31 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; - if (UniquingMap != 0) - ((MachOUniqueMapTy*)UniquingMap)->clear(); TargetLoweringObjectFile::Initialize(Ctx, TM); TextSection // .text - = getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); + = getContext().getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); DataSection // .data - = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA", "__data", 0, + SectionKind::getDataRel()); CStringSection // .cstring - = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, - SectionKind::getMergeable1ByteCString()); + = getContext().getMachOSection("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); UStringSection - = getMachOSection("__TEXT","__ustring", 0, - SectionKind::getMergeable2ByteCString()); + = getContext().getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); FourByteConstantSection // .literal4 - = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, - SectionKind::getMergeableConst4()); + = getContext().getMachOSection("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); EightByteConstantSection // .literal8 - = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, - SectionKind::getMergeableConst8()); + = getContext().getMachOSection("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back // to using it in -static mode. @@ -503,110 +482,130 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, if (TM.getRelocationModel() != Reloc::Static && TM.getTargetData()->getPointerSize() == 32) SixteenByteConstantSection = // .literal16 - getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, - SectionKind::getMergeableConst16()); + getContext().getMachOSection("__TEXT", "__literal16", + MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); ReadOnlySection // .const - = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); + = getContext().getMachOSection("__TEXT", "__const", 0, + SectionKind::getReadOnly()); TextCoalSection - = getMachOSection("__TEXT", "__textcoal_nt", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); + = getContext().getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); ConstTextCoalSection - = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); + = getContext().getMachOSection("__TEXT", "__const_coal", + MCSectionMachO::S_COALESCED, + SectionKind::getText()); ConstDataCoalSection - = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); + = getContext().getMachOSection("__DATA","__const_coal", + MCSectionMachO::S_COALESCED, + SectionKind::getText()); ConstDataSection // .const_data - = getMachOSection("__DATA", "__const", 0, - SectionKind::getReadOnlyWithRel()); + = getContext().getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); DataCoalSection - = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, - SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA","__datacoal_nt", + MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); DataCommonSection - = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); + = getContext().getMachOSection("__DATA","__common", + MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); DataBSSSection - = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); + = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); LazySymbolPointerSection - = getMachOSection("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); + = getContext().getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); NonLazySymbolPointerSection - = getMachOSection("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); + = getContext().getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); if (TM.getRelocationModel() == Reloc::Static) { StaticCtorSection - = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); + = getContext().getMachOSection("__TEXT", "__constructor", 0, + SectionKind::getDataRel()); StaticDtorSection - = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); + = getContext().getMachOSection("__TEXT", "__destructor", 0, + SectionKind::getDataRel()); } else { StaticCtorSection - = getMachOSection("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); StaticDtorSection - = getMachOSection("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); } // Exception Handling. - LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0, - SectionKind::getReadOnlyWithRel()); + LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, + SectionKind::getReadOnlyWithRel()); EHFrameSection = - getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); + getContext().getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); // Debug Information. DwarfAbbrevSection = - getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_abbrev", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfInfoSection = - getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_info", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfLineSection = - getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_line", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfFrameSection = - getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_frame", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubNamesSection = - getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_pubnames", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubTypesSection = - getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_pubtypes", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfStrSection = - getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_str", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfLocSection = - getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_loc", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfARangesSection = - getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_aranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfRangesSection = - getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_ranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfMacroInfoSection = - getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_macinfo", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfDebugInlineSection = - getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_inlined", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); } const MCSection *TargetLoweringObjectFileMachO:: @@ -619,8 +618,8 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, StubSize); if (!ErrorCode.empty()) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getNameStr() + "' has an invalid section specifier '" + GV->getSection()+ "': " + ErrorCode + "."); // Fall back to dropping it into the data section. @@ -629,14 +628,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // Get the section. const MCSectionMachO *S = - getMachOSection(Segment, Section, TAA, StubSize, Kind); + getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // Okay, now that we got the section, verify that the TAA & StubSize agree. // If the user declared multiple globals with different section flags, we need // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getNameStr() + "' section type or attributes does not match previous" " section specifier"); } @@ -806,7 +805,7 @@ const MCSection *TargetLoweringObjectFileCOFF:: getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { // Create the map if it doesn't already exist. if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); + UniquingMap = new COFFUniqueMapTy(); COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; // Do the lookup, if we have a hit, return it. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 0b7fde7ec8f6..7f0412cf2d6d 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -9,7 +9,9 @@ #define DEBUG_TYPE "virtregrewriter" #include "VirtRegRewriter.h" +#include "VirtRegMap.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -893,7 +895,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat; + ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; // Back-schedule reloads and remats. MachineBasicBlock::iterator InsertLoc = @@ -1064,6 +1066,7 @@ class LocalRewriter : public VirtRegRewriter { VirtRegMap *VRM; BitVector AllocatableRegs; DenseMap<MachineInstr*, unsigned> DistanceMap; + DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues; MachineBasicBlock *MBB; // Basic block currently being processed. @@ -1188,12 +1191,24 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, // Mark unused spill slots. MachineFrameInfo *MFI = MF.getFrameInfo(); int SS = VRM->getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) - for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) + if (SS != VirtRegMap::NO_STACK_SLOT) { + for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { + SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS]; if (!VRM->isSpillSlotUsed(SS)) { MFI->RemoveStackObject(SS); + for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { + MachineInstr *DVMI = DbgValues[j]; + MachineBasicBlock *DVMBB = DVMI->getParent(); + DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); + VRM->RemoveMachineInstrFromMaps(DVMI); + DVMBB->erase(DVMI); + } ++NumDSS; } + DbgValues.clear(); + } + } + Slot2DbgValues.clear(); return true; } @@ -1903,6 +1918,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, bool BackTracked = false; MachineInstr &MI = *MII; + // Remember DbgValue's which reference stack slots. + if (MI.isDebugValue() && MI.getOperand(0).isFI()) + Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); + /// ReusedOperands - Keep track of operand reuse in case we need to undo /// reuse. ReuseInfo ReusedOperands(MI, TRI); diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h index 44f9df659c81..93474e0d7ff7 100644 --- a/lib/CodeGen/VirtRegRewriter.h +++ b/lib/CodeGen/VirtRegRewriter.h @@ -10,11 +10,10 @@ #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H #define LLVM_CODEGEN_VIRTREGREWRITER_H -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "VirtRegMap.h" - namespace llvm { + class LiveIntervals; + class MachineFunction; + class VirtRegMap; /// VirtRegRewriter interface: Implementations of this interface assign /// spilled virtual registers to stack slots, rewriting the code. |