diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2011-07-17 15:36:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2011-07-17 15:36:56 +0000 |
commit | 411bd29eea3c360d5b48a18a17b5e87f5671af0e (patch) | |
tree | c8086addb211fa670a9d2b1038d8c2e453229755 /lib | |
parent | 56fe8f14099930935e3870e3e823c322a85c1c89 (diff) | |
download | src-test2-411bd29eea3c360d5b48a18a17b5e87f5671af0e.tar.gz src-test2-411bd29eea3c360d5b48a18a17b5e87f5671af0e.zip |
Notes
Diffstat (limited to 'lib')
645 files changed, 27962 insertions, 22623 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index e57ba7833295..71e0a832696c 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAliasSetPrinterPass(Registry); initializeNoAAPass(Registry); initializeBasicAliasAnalysisPass(Registry); + initializeBlockFrequencyPass(Registry); initializeBranchProbabilityInfoPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp new file mode 100644 index 000000000000..4b86d1db1f04 --- /dev/null +++ b/lib/Analysis/BlockFrequency.cpp @@ -0,0 +1,59 @@ +//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequency.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis", + true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis", + true, true) + +char BlockFrequency::ID = 0; + + +BlockFrequency::BlockFrequency() : FunctionPass(ID) { + initializeBlockFrequencyPass(*PassRegistry::getPassRegistry()); + BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); +} + +BlockFrequency::~BlockFrequency() { + delete BFI; +} + +void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool BlockFrequency::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + BFI->doFunction(&F, &BPI); + return false; +} + +/// getblockFreq - Return block frequency. Never return 0, value must be +/// positive. Please note that initial frequency is equal to 1024. It means that +/// we should not rely on the value itself, but only on the comparison to the +/// other block frequencies. We do this to avoid using of floating points. +/// +uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) { + return BFI->getBlockFreq(BB); +} diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 812fac0bb751..e39cd221b5a7 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/Instructions.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -25,7 +26,7 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", char BranchProbabilityInfo::ID = 0; - +namespace { // Please note that BranchProbabilityAnalysis is not a FunctionPass. // It is created by BranchProbabilityInfo (which is a FunctionPass), which // provides a clear interface. Thanks to that, all heuristics and other @@ -143,6 +144,7 @@ public: bool runOnFunction(Function &F); }; +} // end anonymous namespace // Calculate Edge Weights using "Return Heuristics". Predict a successor which // leads directly to Return Instruction will not be taken. @@ -167,7 +169,7 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { Value *Cond = BI->getCondition(); ICmpInst *CI = dyn_cast<ICmpInst>(Cond); - if (!CI) + if (!CI || !CI->isEquality()) return; Value *LHS = CI->getOperand(0); @@ -184,7 +186,7 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { // p == 0 -> isProb = false // p != q -> isProb = true // p == q -> isProb = false; - bool isProb = !CI->isEquality(); + bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; if (!isProb) std::swap(Taken, NonTaken); @@ -256,6 +258,10 @@ bool BranchProbabilityAnalysis::runOnFunction(Function &F) { return false; } +void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.setPreservesAll(); +} bool BranchProbabilityInfo::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis<LoopInfo>(); @@ -347,8 +353,8 @@ getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const { raw_ostream & BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, BasicBlock *Dst) const { - BranchProbability Prob = getEdgeProbability(Src, Dst); + const BranchProbability Prob = getEdgeProbability(Src, Dst); OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 1a975bf4a582..ab846a26b4db 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp + BlockFrequency.cpp BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 08a6065b31ac..7fca17eb69f6 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -771,12 +771,12 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { return ConstantExpr::getInsertValue( cast<Constant>(IVI->getAggregateOperand()), cast<Constant>(IVI->getInsertedValueOperand()), - IVI->idx_begin(), IVI->getNumIndices()); + IVI->getIndices()); if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) return ConstantExpr::getExtractValue( cast<Constant>(EVI->getAggregateOperand()), - EVI->idx_begin(), EVI->getNumIndices()); + EVI->getIndices()); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops.data(), Ops.size(), TD); @@ -1399,7 +1399,7 @@ llvm::ConstantFoldCall(Function *F, ConstantInt::get(F->getContext(), Res), ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow) }; - return ConstantStruct::get(F->getContext(), Ops, 2, false); + return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); } } } diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index ef5d03a07135..ac5eeeb4706a 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -219,7 +219,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, } /// createMemberType - Create debugging information entry for a member. -DIType DIBuilder::createMemberType(StringRef Name, +DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -227,7 +227,7 @@ DIType DIBuilder::createMemberType(StringRef Name, // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), - File, // Or TheCU ? Ty ? + Scope, MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -786,7 +786,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); + return CallInst::Create(DeclareFn, Args, "", InsertBefore); } /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. @@ -802,9 +802,9 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, // If this block already has a terminator then insert this intrinsic // before the terminator. if (TerminatorInst *T = InsertAtEnd->getTerminator()) - return CallInst::Create(DeclareFn, Args, Args+2, "", T); + return CallInst::Create(DeclareFn, Args, "", T); else - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); + return CallInst::Create(DeclareFn, Args, "", InsertAtEnd); } /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. @@ -819,7 +819,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; - return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); + return CallInst::Create(ValueFn, Args, "", InsertBefore); } /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. @@ -834,6 +834,6 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; - return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); + return CallInst::Create(ValueFn, Args, "", InsertAtEnd); } diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 67f8147f4d61..b42e946f2ffa 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -727,37 +727,37 @@ void DIVariable::dump() const { /// fixupObjcLikeName - Replace contains special characters used /// in a typical Objective-C names with '.' in a given string. -static void fixupObjcLikeName(std::string &Str) { +static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) { + bool isObjCLike = false; for (size_t i = 0, e = Str.size(); i < e; ++i) { char C = Str[i]; - if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' || - C == '(' || C == ')') - Str[i] = '.'; + if (C == '[') + isObjCLike = true; + + if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' || + C == '+' || C == '(' || C == ')')) + Out.push_back('.'); + else + Out.push_back(C); } } /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { - if (FuncName.find('[') == StringRef::npos) - return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName)); - std::string Name = FuncName; - fixupObjcLikeName(Name); - return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name)); + SmallString<32> Name = StringRef("llvm.dbg.lv."); + fixupObjcLikeName(FuncName, Name); + + return M.getNamedMetadata(Name.str()); } /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable /// to hold function specific information. NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { - SmallString<32> Out; - if (FuncName.find('[') == StringRef::npos) - return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName) - .toStringRef(Out)); - - std::string Name = FuncName; - fixupObjcLikeName(Name); - return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name) - .toStringRef(Out)); + SmallString<32> Name = StringRef("llvm.dbg.lv."); + fixupObjcLikeName(FuncName, Name); + + return M.getOrInsertNamedMetadata(Name.str()); } diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index dde25565ad81..6535786668bc 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -96,8 +96,6 @@ void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { OS << "Types in use by this module:\n"; for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { - OS << " "; - WriteTypeSymbolic(OS, *I, M); - OS << '\n'; + OS << " " << **I << '\n'; } } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index a0c42f0cbfa5..e5f0a77ab67d 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" @@ -39,15 +38,6 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) -// IVUsers behavior currently depends on this temporary indvars mode. The -// option must be defined upstream from its uses. -namespace llvm { - bool DisableIVRewrite = false; -} -cl::opt<bool, true> DisableIVRewriteOpt( - "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite), - cl::desc("Disable canonical induction variable rewriting")); - Pass *llvm::createIVUsersPass() { return new IVUsers(); } @@ -56,17 +46,20 @@ Pass *llvm::createIVUsersPass() { /// used by the given expression, within the context of analyzing the /// given loop. static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, - ScalarEvolution *SE) { + ScalarEvolution *SE, LoopInfo *LI) { // An addrec is interesting if it's affine or if it has an interesting start. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - // Keep things simple. Don't touch loop-variant strides. + // Keep things simple. Don't touch loop-variant strides unless they're + // only used outside the loop and we can simplify them. if (AR->getLoop() == L) - return AR->isAffine() || !L->contains(I); + return AR->isAffine() || + (!L->contains(I) && + SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR); // Otherwise recurse to see if the start value is interesting, and that // the step value is not interesting, since we don't yet know how to // do effective SCEV expansions for addrecs with interesting steps. - return isInteresting(AR->getStart(), I, L, SE) && - !isInteresting(AR->getStepRecurrence(*SE), I, L, SE); + return isInteresting(AR->getStart(), I, L, SE, LI) && + !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI); } // An add is interesting if exactly one of its operands is interesting. @@ -74,7 +67,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, bool AnyInterestingYet = false; for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); OI != OE; ++OI) - if (isInteresting(*OI, I, L, SE)) { + if (isInteresting(*OI, I, L, SE, LI)) { if (AnyInterestingYet) return false; AnyInterestingYet = true; @@ -89,7 +82,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, /// AddUsersIfInteresting - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { +bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (!SE->isSCEVable(I->getType())) return false; // Void and FP expressions cannot be reduced. @@ -100,11 +93,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; - // We expect Sign/Zero extension to be eliminated from the IR before analyzing - // any downstream uses. - if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I))) - return false; - if (!Processed.insert(I)) return true; // Instruction already handled. @@ -113,7 +101,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { // If we've come to an uninteresting expression, stop the traversal and // call this a user. - if (!isInteresting(ISE, I, L, SE)) + if (!isInteresting(ISE, I, L, SE, LI)) return false; SmallPtrSet<Instruction *, 4> UniqueUsers; @@ -136,13 +124,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User, Phi)) { + !AddUsersIfInteresting(User)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } - } else if (Processed.count(User) || - !AddUsersIfInteresting(User, Phi)) { + } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -150,7 +137,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { if (AddUserToIVUsers) { // Okay, we found a user that we cannot reduce. - IVUses.push_back(new IVStrideUse(this, User, I, Phi)); + IVUses.push_back(new IVStrideUse(this, User, I)); IVStrideUse &NewUse = IVUses.back(); // Autodetect the post-inc loop set, populating NewUse.PostIncLoops. // The regular return value here is discarded; instead of recording @@ -165,8 +152,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) { return true; } -IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) { - IVUses.push_back(new IVStrideUse(this, User, Operand, Phi)); +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, User, Operand)); return IVUses.back(); } @@ -194,7 +181,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - (void)AddUsersIfInteresting(I, cast<PHINode>(I)); + (void)AddUsersIfInteresting(I); return false; } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 9d78f8bf4044..8709f6bf9d26 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2204,15 +2204,15 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, if (TrueVal == FalseVal) return TrueVal; - if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X - return FalseVal; - if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X - return TrueVal; if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y if (isa<Constant>(TrueVal)) return TrueVal; return FalseVal; } + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return FalseVal; + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return TrueVal; return 0; } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index f130f30c49da..89755da85097 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -592,8 +592,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), - Ex->idx_begin(), - Ex->idx_end())) + Ex->getIndices())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { @@ -607,9 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef<unsigned> Indices = CE->getIndices(); - if (Value *W = FindInsertedValue(CE->getOperand(0), - Indices.begin(), - Indices.end())) + if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 64d215c37cc7..2283db0bc482 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { - assert(Res.isClobber() != Res.isDef() && - "Local dep should be def or clobber!"); + assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && + "Local dep should be unknown, def or clobber!"); Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(), Res.isClobber()), static_cast<BasicBlock *>(0))); @@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) { for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { const MemDepResult &Res = I->getResult(); - assert(Res.isClobber() != Res.isDef() && - "Resolved non-local call dep should be def or clobber!"); + assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && + "Resolved non-local call dep should be unknown, def or " + "clobber!"); InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), Res.isClobber()), I->getBB())); @@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { bool isClobber = I->first.getInt(); const BasicBlock *DepBB = I->second; - OS << " " << (isClobber ? "Clobber" : " Def"); + OS << " "; + if (!DepInst) + OS << "Unknown"; + else if (isClobber) + OS << "Clobber"; + else + OS << " Def"; if (DepBB) { OS << " in block "; WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); } - OS << " from: "; - if (DepInst == Inst) - OS << "<unspecified>"; - else - DepInst->print(OS); + if (DepInst) { + OS << " from: "; + if (DepInst == Inst) + OS << "<unspecified>"; + else + DepInst->print(OS); + } OS << "\n"; } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 769c68ce425e..53d430491198 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -50,13 +50,8 @@ static bool isMallocCall(const CallInst *CI) { const FunctionType *FTy = Callee->getFunctionType(); if (FTy->getNumParams() != 1) return false; - if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) { - if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) - return false; - return true; - } - - return false; + return FTy->getParamType(0)->isIntegerTy(32) || + FTy->getParamType(0)->isIntegerTy(64); } /// extractMallocCall - Returns the corresponding CallInst if the instruction @@ -211,7 +206,7 @@ const CallInst *llvm::isFreeCall(const Value *I) { return 0; if (FTy->getNumParams() != 1) return 0; - if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext())) + if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) return 0; return CI; diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 5f640c01d252..bba4482f4da5 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -47,6 +47,11 @@ STATISTIC(NumUncacheNonLocalPtr, STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); +// Limit for the number of instructions to scan in a block. +// FIXME: Figure out what a sane value is for this. +// (500 is relatively insane.) +static const int BlockScanLimit = 500; + char MemoryDependenceAnalysis::ID = 0; // Register this pass... @@ -180,8 +185,16 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, MemDepResult MemoryDependenceAnalysis:: getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB) { + unsigned Limit = BlockScanLimit; + // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + Instruction *Inst = --ScanIt; // If this inst is a memory op, get the pointer it accessed @@ -215,11 +228,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, } } - // No dependence found. If this is the entry block of the function, it is a - // clobber, otherwise it is non-local. + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getClobber(ScanIt); + return MemDepResult::getUnknown(); } /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that @@ -322,9 +335,17 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, const Value *MemLocBase = 0; int64_t MemLocOffset = 0; - + + unsigned Limit = BlockScanLimit; + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + Instruction *Inst = --ScanIt; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -458,11 +479,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } } - // No dependence found. If this is the entry block of the function, it is a - // clobber, otherwise it is non-local. + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getClobber(ScanIt); + return MemDepResult::getUnknown(); } /// getDependency - Return the instruction on which a memory operation @@ -490,12 +511,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { - // No dependence found. If this is the entry block of the function, it is a - // clobber, otherwise it is non-local. + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. if (QueryParent != &QueryParent->getParent()->getEntryBlock()) LocalCache = MemDepResult::getNonLocal(); else - LocalCache = MemDepResult::getClobber(QueryInst); + LocalCache = MemDepResult::getUnknown(); } else { AliasAnalysis::Location MemLoc; AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); @@ -514,7 +535,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { QueryParent); } else // Non-memory instruction. - LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + LocalCache = MemDepResult::getUnknown(); } // Remember the result! @@ -648,10 +669,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { // No dependence found. If this is the entry block of the function, it is - // a clobber, otherwise it is non-local. + // a clobber, otherwise it is unknown. Dep = MemDepResult::getNonLocal(); } else { - Dep = MemDepResult::getClobber(ScanPos); + Dep = MemDepResult::getUnknown(); } // If we had a dirty entry for the block, update it. Otherwise, just add @@ -707,7 +728,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, return; Result.clear(); Result.push_back(NonLocalDepResult(FromBB, - MemDepResult::getClobber(FromBB->begin()), + MemDepResult::getUnknown(), const_cast<Value *>(Loc.Ptr))); } @@ -769,7 +790,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! - if (Dep.isNonLocal()) + if (Dep.isNonLocal() || Dep.isUnknown()) return Dep; // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently @@ -1091,16 +1112,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // If getNonLocalPointerDepFromBB fails here, that means the cached // result conflicted with the Visited list; we have to conservatively - // assume a clobber, but this also does not block PRE of the load. + // assume it is unknown, but this also does not block PRE of the load. if (!CanTranslate || getNonLocalPointerDepFromBB(PredPointer, Loc.getWithNewPtr(PredPtrVal), isLoad, Pred, Result, Visited)) { // Add the entry to the Result list. - NonLocalDepResult Entry(Pred, - MemDepResult::getClobber(Pred->getTerminator()), - PredPtrVal); + NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); Result.push_back(Entry); // Since we had a phi translation failure, the cache for CacheKey won't @@ -1145,8 +1164,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - // If *nothing* works, mark the pointer as being clobbered by the first - // instruction in this block. + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole // incoming value. Since we can't phi translate to one of the predecessors, @@ -1161,8 +1179,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); - I->setResult(MemDepResult::getClobber(BB->getTerminator())); - ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey); + I->setResult(MemDepResult::getUnknown()); Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Pointer.getAddr())); break; diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 8e5a40008d88..befe6d2599d6 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -19,6 +19,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" + using namespace llvm; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, @@ -159,7 +160,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // If we haven't found this binop, insert it. - Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"); + Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp")); + BO->setDebugLoc(SaveInsertPt->getDebugLoc()); rememberInstruction(BO); // Restore the original insert point. @@ -847,6 +849,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, const Type *ExpandTy, const Type *IntTy) { + assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); + // Reuse a previously-inserted PHI, if present. for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) @@ -871,13 +875,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // If any of the operands don't dominate the insert position, bail. // Addrec operands are always loop-invariant, so this can only happen // if there are instructions which haven't been hoisted. - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) { - IncV = 0; - break; - } + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) { + IncV = 0; + break; + } + } if (!IncV) break; // Advance to the next instruction. @@ -919,6 +925,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, L->getHeader()->begin()); + // StartV must be hoisted into L's preheader to dominate the new phi. + assert(!isa<Instruction>(StartV) || + SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), + L->getHeader())); + // Expand code for the step value. Insert instructions right before the // terminator corresponding to the back-edge. Do this before creating the PHI // so that PHI reuse code doesn't see an incomplete PHI. If the stride is @@ -935,7 +946,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, BasicBlock *Header = L->getHeader(); Builder.SetInsertPoint(Header, Header->begin()); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); - PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv"); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), + Twine(IVName) + ".iv"); rememberInstruction(PN); // Create the step instructions and populate the PHI. @@ -953,7 +965,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // at IVIncInsertPos. Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); - Builder.SetInsertPoint(InsertPos->getParent(), InsertPos); + Builder.SetInsertPoint(InsertPos); Value *IncV; // If the PHI is a pointer, use a GEP, otherwise use an add or sub. if (isPointer) { @@ -971,8 +983,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } } else { IncV = isNegative ? - Builder.CreateSub(PN, StepV, "lsr.iv.next") : - Builder.CreateAdd(PN, StepV, "lsr.iv.next"); + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); rememberInstruction(IncV); } PN->addIncoming(IncV, Pred); @@ -1155,6 +1167,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, "indvar.next", HP->getTerminator()); + Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); rememberInstruction(Add); CanonicalIV->addIncoming(Add, HP); } else { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index dab5aebd6c64..455c91077dfb 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1352,14 +1352,15 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // we might be able to find the complete struct somewhere. // Find the value that is at that particular spot - Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end()); + Value *V = FindInsertedValue(From, Idxs); if (!V) return NULL; // Insert the value in the new (sub) aggregrate - return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip, - Idxs.end(), "tmp", InsertBefore); + return llvm::InsertValueInst::Create(To, V, + ArrayRef<unsigned>(Idxs).slice(IdxSkip), + "tmp", InsertBefore); } // This helper takes a nested struct and extracts a part of it (which is again a @@ -1374,15 +1375,13 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // insertvalue instruction somewhere). // // All inserted insertvalue instructions are inserted before InsertBefore -static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, - const unsigned *idx_end, +static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), - idx_begin, - idx_end); + idx_range); Value *To = UndefValue::get(IndexedType); - SmallVector<unsigned, 10> Idxs(idx_begin, idx_end); + SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); unsigned IdxSkip = Idxs.size(); return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); @@ -1394,39 +1393,37 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, /// /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. -Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, - const unsigned *idx_end, Instruction *InsertBefore) { +Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, + Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our // recursion) - if (idx_begin == idx_end) + if (idx_range.empty()) return V; // We have indices, so V should have an indexable type assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && "Not looking at a struct or array?"); - assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) + assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && "Invalid indices for type?"); const CompositeType *PTy = cast<CompositeType>(V->getType()); if (isa<UndefValue>(V)) return UndefValue::get(ExtractValueInst::getIndexedType(PTy, - idx_begin, - idx_end)); + idx_range)); else if (isa<ConstantAggregateZero>(V)) return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, - idx_begin, - idx_end)); + idx_range)); else if (Constant *C = dyn_cast<Constant>(V)) { if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) // Recursively process this constant - return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, - idx_end, InsertBefore); + return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1), + InsertBefore); } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices - const unsigned *req_idx = idx_begin; + const unsigned *req_idx = idx_range.begin(); for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); i != e; ++i, ++req_idx) { - if (req_idx == idx_end) { + if (req_idx == idx_range.end()) { if (InsertBefore) // The requested index identifies a part of a nested aggregate. Handle // this specially. For example, @@ -1438,7 +1435,10 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. - return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore); + return BuildSubAggregate(V, + ArrayRef<unsigned>(idx_range.begin(), + req_idx), + InsertBefore); else // We can't handle this without inserting insertvalues return 0; @@ -1448,13 +1448,14 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // See if the (aggregrate) value inserted into has the value we are // looking for, then. if (*req_idx != *i) - return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end, + return FindInsertedValue(I->getAggregateOperand(), idx_range, InsertBefore); } // If we end up here, the indices of the insertvalue match with those // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. - return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end, + return FindInsertedValue(I->getInsertedValueOperand(), + ArrayRef<unsigned>(req_idx, idx_range.end()), InsertBefore); } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { // If we're extracting a value from an aggregrate that was extracted from @@ -1462,24 +1463,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // However, we will need to chain I's indices with the requested indices. // Calculate the number of indices required - unsigned size = I->getNumIndices() + (idx_end - idx_begin); + unsigned size = I->getNumIndices() + idx_range.size(); // Allocate some space to put the new indices in SmallVector<unsigned, 5> Idxs; Idxs.reserve(size); // Add indices from the extract value instruction - for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); - i != e; ++i) - Idxs.push_back(*i); + Idxs.append(I->idx_begin(), I->idx_end()); // Add requested indices - for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i) - Idxs.push_back(*i); + Idxs.append(idx_range.begin(), idx_range.end()); assert(Idxs.size() == size && "Number of indices added not correct?"); - return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(), - InsertBefore); + return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) @@ -1783,3 +1780,19 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { } return V; } + +/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer +/// are lifetime markers. +/// +bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + if (!II) return false; + + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } + return true; +} diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 014e81602d64..3c63106e8c3b 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -406,29 +406,20 @@ lltok::Kind LLLexer::LexQuote() { return kind; } -static bool JustWhitespaceNewLine(const char *&Ptr) { - const char *ThisPtr = Ptr; - while (*ThisPtr == ' ' || *ThisPtr == '\t') - ++ThisPtr; - if (*ThisPtr == '\n' || *ThisPtr == '\r') { - Ptr = ThisPtr; - return true; - } - return false; -} - /// LexExclaim: /// !foo /// ! lltok::Kind LLLexer::LexExclaim() { // Lex a metadata name as a MetadataVar. - if (isalpha(CurPtr[0])) { + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { ++CurPtr; while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || - CurPtr[0] == '.' || CurPtr[0] == '_') + CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') ++CurPtr; StrVal.assign(TokStart+1, CurPtr); // Skip ! + UnEscapeLexed(StrVal); return lltok::MetadataVar; } return lltok::exclaim; @@ -480,7 +471,6 @@ lltok::Kind LLLexer::LexIdentifier() { if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ return lltok::kw_##STR; - KEYWORD(begin); KEYWORD(end); KEYWORD(true); KEYWORD(false); KEYWORD(declare); KEYWORD(define); KEYWORD(global); KEYWORD(constant); @@ -570,6 +560,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noimplicitfloat); KEYWORD(naked); KEYWORD(hotpatch); + KEYWORD(nonlazybind); KEYWORD(type); KEYWORD(opaque); @@ -598,26 +589,6 @@ lltok::Kind LLLexer::LexIdentifier() { TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); #undef TYPEKEYWORD - // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is - // to avoid conflicting with the sext/zext instructions, below. - if (Len == 4 && !memcmp(StartChar, "sext", 4)) { - // Scan CurPtr ahead, seeing if there is just whitespace before the newline. - if (JustWhitespaceNewLine(CurPtr)) - return lltok::kw_signext; - } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { - // Scan CurPtr ahead, seeing if there is just whitespace before the newline. - if (JustWhitespaceNewLine(CurPtr)) - return lltok::kw_zeroext; - } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) { - // FIXME: Remove in LLVM 3.0. - // Autoupgrade malloc instruction. - return lltok::kw_malloc; - } else if (Len == 4 && !memcmp(StartChar, "free", 4)) { - // FIXME: Remove in LLVM 3.0. - // Autoupgrade malloc instruction. - return lltok::kw_free; - } - // Keywords for instructions. #define INSTKEYWORD(STR, Enum) \ if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ @@ -664,7 +635,6 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(extractelement, ExtractElement); INSTKEYWORD(insertelement, InsertElement); INSTKEYWORD(shufflevector, ShuffleVector); - INSTKEYWORD(getresult, ExtractValue); INSTKEYWORD(extractvalue, ExtractValue); INSTKEYWORD(insertvalue, InsertValue); #undef INSTKEYWORD @@ -689,14 +659,6 @@ lltok::Kind LLLexer::LexIdentifier() { return lltok::kw_cc; } - // If this starts with "call", return it as CALL. This is to support old - // broken .ll files. FIXME: remove this with LLVM 3.0. - if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { - CurPtr = TokStart+4; - UIntVal = Instruction::Call; - return lltok::kw_call; - } - // Finally, if this isn't known, return an error. CurPtr = TokStart+1; return lltok::Error; diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index 4fe705e1a5b5..33b913572375 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -38,7 +38,7 @@ namespace llvm { lltok::Kind CurKind; std::string StrVal; unsigned UIntVal; - const Type *TyVal; + Type *TyVal; APFloat APFloatVal; APSInt APSIntVal; @@ -56,7 +56,7 @@ namespace llvm { LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); } lltok::Kind getKind() const { return CurKind; } const std::string &getStrVal() const { return StrVal; } - const Type *getTyVal() const { return TyVal; } + Type *getTyVal() const { return TyVal; } unsigned getUIntVal() const { return UIntVal; } const APSInt &getAPSIntVal() const { return APSIntVal; } const APFloat &getAPFloatVal() const { return APFloatVal; } diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 81e0747266f1..cfc31f3db8a7 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -26,6 +26,13 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +static std::string getTypeString(const Type *T) { + std::string Result; + raw_string_ostream Tmp(Result); + Tmp << *T; + return Tmp.str(); +} + /// Run: module ::= toplevelentity* bool LLParser::Run() { // Prime the lexer. @@ -59,24 +66,6 @@ bool LLParser::ValidateEndOfModule() { } - // Update auto-upgraded malloc calls to "malloc". - // FIXME: Remove in LLVM 3.0. - if (MallocF) { - MallocF->setName("malloc"); - // If setName() does not set the name to "malloc", then there is already a - // declaration of "malloc". In that case, iterate over all calls to MallocF - // and get them to call the declared "malloc" instead. - if (MallocF->getName() != "malloc") { - Constant *RealMallocF = M->getFunction("malloc"); - if (RealMallocF->getType() != MallocF->getType()) - RealMallocF = ConstantExpr::getBitCast(RealMallocF, MallocF->getType()); - MallocF->replaceAllUsesWith(RealMallocF); - MallocF->eraseFromParent(); - MallocF = NULL; - } - } - - // If there are entries in ForwardRefBlockAddresses at this point, they are // references after the function was defined. Resolve those now. while (!ForwardRefBlockAddresses.empty()) { @@ -100,15 +89,16 @@ bool LLParser::ValidateEndOfModule() { ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin()); } - - if (!ForwardRefTypes.empty()) - return Error(ForwardRefTypes.begin()->second.second, - "use of undefined type named '" + - ForwardRefTypes.begin()->first + "'"); - if (!ForwardRefTypeIDs.empty()) - return Error(ForwardRefTypeIDs.begin()->second.second, - "use of undefined type '%" + - Twine(ForwardRefTypeIDs.begin()->first) + "'"); + for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) + if (NumberedTypes[i].second.isValid()) + return Error(NumberedTypes[i].second, + "use of undefined type '%" + Twine(i) + "'"); + + for (StringMap<std::pair<Type*, LocTy> >::iterator I = + NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) + if (I->second.second.isValid()) + return Error(I->second.second, + "use of undefined type named '" + I->getKey() + "'"); if (!ForwardRefVals.empty()) return Error(ForwardRefVals.begin()->second.second, @@ -176,15 +166,12 @@ bool LLParser::ParseTopLevelEntities() { switch (Lex.getKind()) { default: return TokError("expected top-level entity"); case lltok::Eof: return false; - //case lltok::kw_define: case lltok::kw_declare: if (ParseDeclare()) return true; break; case lltok::kw_define: if (ParseDefine()) return true; break; case lltok::kw_module: if (ParseModuleAsm()) return true; break; case lltok::kw_target: if (ParseTargetDefinition()) return true; break; case lltok::kw_deplibs: if (ParseDepLibs()) return true; break; - case lltok::kw_type: if (ParseUnnamedType()) return true; break; case lltok::LocalVarID: if (ParseUnnamedType()) return true; break; - case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0 case lltok::LocalVar: if (ParseNamedType()) return true; break; case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break; case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; @@ -304,45 +291,35 @@ bool LLParser::ParseDepLibs() { } /// ParseUnnamedType: -/// ::= 'type' type /// ::= LocalVarID '=' 'type' type bool LLParser::ParseUnnamedType() { - unsigned TypeID = NumberedTypes.size(); - - // Handle the LocalVarID form. - if (Lex.getKind() == lltok::LocalVarID) { - if (Lex.getUIntVal() != TypeID) - return Error(Lex.getLoc(), "type expected to be numbered '%" + - Twine(TypeID) + "'"); - Lex.Lex(); // eat LocalVarID; - - if (ParseToken(lltok::equal, "expected '=' after name")) - return true; - } - LocTy TypeLoc = Lex.getLoc(); - if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true; + unsigned TypeID = Lex.getUIntVal(); + Lex.Lex(); // eat LocalVarID; - PATypeHolder Ty(Type::getVoidTy(Context)); - if (ParseType(Ty)) return true; - - // See if this type was previously referenced. - std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator - FI = ForwardRefTypeIDs.find(TypeID); - if (FI != ForwardRefTypeIDs.end()) { - if (FI->second.first.get() == Ty) - return Error(TypeLoc, "self referential type is invalid"); + if (ParseToken(lltok::equal, "expected '=' after name") || + ParseToken(lltok::kw_type, "expected 'type' after '='")) + return true; - cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty); - Ty = FI->second.first.get(); - ForwardRefTypeIDs.erase(FI); + if (TypeID >= NumberedTypes.size()) + NumberedTypes.resize(TypeID+1); + + Type *Result = 0; + if (ParseStructDefinition(TypeLoc, "", + NumberedTypes[TypeID], Result)) return true; + + if (!isa<StructType>(Result)) { + std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID]; + if (Entry.first) + return Error(TypeLoc, "non-struct types may not be recursive"); + Entry.first = Result; + Entry.second = SMLoc(); } - NumberedTypes.push_back(Ty); - return false; } + /// toplevelentity /// ::= LocalVar '=' 'type' type bool LLParser::ParseNamedType() { @@ -350,42 +327,23 @@ bool LLParser::ParseNamedType() { LocTy NameLoc = Lex.getLoc(); Lex.Lex(); // eat LocalVar. - PATypeHolder Ty(Type::getVoidTy(Context)); - if (ParseToken(lltok::equal, "expected '=' after name") || - ParseToken(lltok::kw_type, "expected 'type' after name") || - ParseType(Ty)) + ParseToken(lltok::kw_type, "expected 'type' after name")) return true; - - // Set the type name, checking for conflicts as we do so. - bool AlreadyExists = M->addTypeName(Name, Ty); - if (!AlreadyExists) return false; - - // See if this type is a forward reference. We need to eagerly resolve - // types to allow recursive type redefinitions below. - std::map<std::string, std::pair<PATypeHolder, LocTy> >::iterator - FI = ForwardRefTypes.find(Name); - if (FI != ForwardRefTypes.end()) { - if (FI->second.first.get() == Ty) - return Error(NameLoc, "self referential type is invalid"); - - cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty); - Ty = FI->second.first.get(); - ForwardRefTypes.erase(FI); + + Type *Result = 0; + if (ParseStructDefinition(NameLoc, Name, + NamedTypes[Name], Result)) return true; + + if (!isa<StructType>(Result)) { + std::pair<Type*, LocTy> &Entry = NamedTypes[Name]; + if (Entry.first) + return Error(NameLoc, "non-struct types may not be recursive"); + Entry.first = Result; + Entry.second = SMLoc(); } - - // Inserting a name that is already defined, get the existing name. - const Type *Existing = M->getTypeByName(Name); - assert(Existing && "Conflict but no matching type?!"); - - // Otherwise, this is an attempt to redefine a type. That's okay if - // the redefinition is identical to the original. - // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0 - if (Existing == Ty) return false; - - // Any other kind of (non-equivalent) redefinition is an error. - return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" + - Ty->getDescription() + "'"); + + return false; } @@ -561,7 +519,7 @@ bool LLParser::ParseStandaloneMetadata() { unsigned MetadataID = 0; LocTy TyLoc; - PATypeHolder Ty(Type::getVoidTy(Context)); + Type *Ty = 0; SmallVector<Value *, 16> Elts; if (ParseUInt32(MetadataID) || ParseToken(lltok::equal, "expected '=' here") || @@ -693,7 +651,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, LocTy UnnamedAddrLoc; LocTy TyLoc; - PATypeHolder Ty(Type::getVoidTy(Context)); + Type *Ty = 0; if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) || ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, @@ -811,24 +769,17 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, if (Val) { if (Val->getType() == Ty) return Val; Error(Loc, "'@" + Name + "' defined with type '" + - Val->getType()->getDescription() + "'"); + getTypeString(Val->getType()) + "'"); return 0; } // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal; - if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) { - // Function types can return opaque but functions can't. - if (FT->getReturnType()->isOpaqueTy()) { - Error(Loc, "function may not return opaque type"); - return 0; - } - + if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); - } else { + else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, GlobalValue::ExternalWeakLinkage, 0, Name); - } ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; @@ -856,23 +807,17 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { if (Val) { if (Val->getType() == Ty) return Val; Error(Loc, "'@" + Twine(ID) + "' defined with type '" + - Val->getType()->getDescription() + "'"); + getTypeString(Val->getType()) + "'"); return 0; } // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal; - if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) { - // Function types can return opaque but functions can't. - if (FT->getReturnType()->isOpaqueTy()) { - Error(Loc, "function may not return opaque type"); - return 0; - } + if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M); - } else { + else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, GlobalValue::ExternalWeakLinkage, 0, ""); - } ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; @@ -931,33 +876,23 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { /// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind /// indicates what kind of attribute list this is: 0: function arg, 1: result, /// 2: function attr. -/// 3: function arg after value: FIXME: REMOVE IN LLVM 3.0 bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { Attrs = Attribute::None; LocTy AttrLoc = Lex.getLoc(); while (1) { switch (Lex.getKind()) { - case lltok::kw_sext: - case lltok::kw_zext: - // Treat these as signext/zeroext if they occur in the argument list after - // the value, as in "call i8 @foo(i8 10 sext)". If they occur before the - // value, as in "call i8 @foo(i8 sext (" then it is part of a constant - // expr. - // FIXME: REMOVE THIS IN LLVM 3.0 - if (AttrKind == 3) { - if (Lex.getKind() == lltok::kw_sext) - Attrs |= Attribute::SExt; - else - Attrs |= Attribute::ZExt; - break; - } - // FALL THROUGH. default: // End of attributes. if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly)) return Error(AttrLoc, "invalid use of function-only attribute"); - if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly)) + // As a hack, we allow "align 2" on functions as a synonym for + // "alignstack 2". + if (AttrKind == 2 && + (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment))) + return Error(AttrLoc, "invalid use of attribute on a function"); + + if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly)) return Error(AttrLoc, "invalid use of parameter-only attribute"); return false; @@ -985,6 +920,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; case lltok::kw_naked: Attrs |= Attribute::Naked; break; case lltok::kw_hotpatch: Attrs |= Attribute::Hotpatch; break; + case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; case lltok::kw_alignstack: { unsigned Alignment; @@ -1262,166 +1198,68 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices, // Type Parsing. //===----------------------------------------------------------------------===// -/// ParseType - Parse and resolve a full type. -bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) { - LocTy TypeLoc = Lex.getLoc(); - if (ParseTypeRec(Result)) return true; - - // Verify no unresolved uprefs. - if (!UpRefs.empty()) - return Error(UpRefs.back().Loc, "invalid unresolved type up reference"); - - if (!AllowVoid && Result.get()->isVoidTy()) - return Error(TypeLoc, "void type only allowed for function results"); - - return false; -} - -/// HandleUpRefs - Every time we finish a new layer of types, this function is -/// called. It loops through the UpRefs vector, which is a list of the -/// currently active types. For each type, if the up-reference is contained in -/// the newly completed type, we decrement the level count. When the level -/// count reaches zero, the up-referenced type is the type that is passed in: -/// thus we can complete the cycle. -/// -PATypeHolder LLParser::HandleUpRefs(const Type *ty) { - // If Ty isn't abstract, or if there are no up-references in it, then there is - // nothing to resolve here. - if (!ty->isAbstract() || UpRefs.empty()) return ty; - - PATypeHolder Ty(ty); -#if 0 - dbgs() << "Type '" << Ty->getDescription() - << "' newly formed. Resolving upreferences.\n" - << UpRefs.size() << " upreferences active!\n"; -#endif - - // If we find any resolvable upreferences (i.e., those whose NestingLevel goes - // to zero), we resolve them all together before we resolve them to Ty. At - // the end of the loop, if there is anything to resolve to Ty, it will be in - // this variable. - OpaqueType *TypeToResolve = 0; - - for (unsigned i = 0; i != UpRefs.size(); ++i) { - // Determine if 'Ty' directly contains this up-references 'LastContainedTy'. - bool ContainsType = - std::find(Ty->subtype_begin(), Ty->subtype_end(), - UpRefs[i].LastContainedTy) != Ty->subtype_end(); - -#if 0 - dbgs() << " UR#" << i << " - TypeContains(" << Ty->getDescription() << ", " - << UpRefs[i].LastContainedTy->getDescription() << ") = " - << (ContainsType ? "true" : "false") - << " level=" << UpRefs[i].NestingLevel << "\n"; -#endif - if (!ContainsType) - continue; - - // Decrement level of upreference - unsigned Level = --UpRefs[i].NestingLevel; - UpRefs[i].LastContainedTy = Ty; - - // If the Up-reference has a non-zero level, it shouldn't be resolved yet. - if (Level != 0) - continue; - -#if 0 - dbgs() << " * Resolving upreference for " << UpRefs[i].UpRefTy << "\n"; -#endif - if (!TypeToResolve) - TypeToResolve = UpRefs[i].UpRefTy; - else - UpRefs[i].UpRefTy->refineAbstractTypeTo(TypeToResolve); - UpRefs.erase(UpRefs.begin()+i); // Remove from upreference list. - --i; // Do not skip the next element. - } - - if (TypeToResolve) - TypeToResolve->refineAbstractTypeTo(Ty); - - return Ty; -} - - -/// ParseTypeRec - The recursive function used to process the internal -/// implementation details of types. -bool LLParser::ParseTypeRec(PATypeHolder &Result) { +/// ParseType - Parse a type. +bool LLParser::ParseType(Type *&Result, bool AllowVoid) { + SMLoc TypeLoc = Lex.getLoc(); switch (Lex.getKind()) { default: return TokError("expected type"); case lltok::Type: - // TypeRec ::= 'float' | 'void' (etc) + // Type ::= 'float' | 'void' (etc) Result = Lex.getTyVal(); Lex.Lex(); break; - case lltok::kw_opaque: - // TypeRec ::= 'opaque' - Result = OpaqueType::get(Context); - Lex.Lex(); - break; case lltok::lbrace: - // TypeRec ::= '{' ... '}' - if (ParseStructType(Result, false)) + // Type ::= StructType + if (ParseAnonStructType(Result, false)) return true; break; case lltok::lsquare: - // TypeRec ::= '[' ... ']' + // Type ::= '[' ... ']' Lex.Lex(); // eat the lsquare. if (ParseArrayVectorType(Result, false)) return true; break; case lltok::less: // Either vector or packed struct. - // TypeRec ::= '<' ... '>' + // Type ::= '<' ... '>' Lex.Lex(); if (Lex.getKind() == lltok::lbrace) { - if (ParseStructType(Result, true) || + if (ParseAnonStructType(Result, true) || ParseToken(lltok::greater, "expected '>' at end of packed struct")) return true; } else if (ParseArrayVectorType(Result, true)) return true; break; - case lltok::LocalVar: - case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0 - // TypeRec ::= %foo - if (const Type *T = M->getTypeByName(Lex.getStrVal())) { - Result = T; - } else { - Result = OpaqueType::get(Context); - ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(), - std::make_pair(Result, - Lex.getLoc()))); - M->addTypeName(Lex.getStrVal(), Result.get()); + case lltok::LocalVar: { + // Type ::= %foo + std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()]; + + // If the type hasn't been defined yet, create a forward definition and + // remember where that forward def'n was seen (in case it never is defined). + if (Entry.first == 0) { + Entry.first = StructType::createNamed(Context, Lex.getStrVal()); + Entry.second = Lex.getLoc(); } + Result = Entry.first; Lex.Lex(); break; + } - case lltok::LocalVarID: - // TypeRec ::= %4 - if (Lex.getUIntVal() < NumberedTypes.size()) - Result = NumberedTypes[Lex.getUIntVal()]; - else { - std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator - I = ForwardRefTypeIDs.find(Lex.getUIntVal()); - if (I != ForwardRefTypeIDs.end()) - Result = I->second.first; - else { - Result = OpaqueType::get(Context); - ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(), - std::make_pair(Result, - Lex.getLoc()))); - } + case lltok::LocalVarID: { + // Type ::= %4 + if (Lex.getUIntVal() >= NumberedTypes.size()) + NumberedTypes.resize(Lex.getUIntVal()+1); + std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()]; + + // If the type hasn't been defined yet, create a forward definition and + // remember where that forward def'n was seen (in case it never is defined). + if (Entry.first == 0) { + Entry.first = StructType::createNamed(Context, ""); + Entry.second = Lex.getLoc(); } + Result = Entry.first; Lex.Lex(); break; - case lltok::backslash: { - // TypeRec ::= '\' 4 - Lex.Lex(); - unsigned Val; - if (ParseUInt32(Val)) return true; - OpaqueType *OT = OpaqueType::get(Context); //Use temporary placeholder. - UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT)); - Result = OT; - break; } } @@ -1429,34 +1267,37 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { while (1) { switch (Lex.getKind()) { // End of type. - default: return false; + default: + if (!AllowVoid && Result->isVoidTy()) + return Error(TypeLoc, "void type only allowed for function results"); + return false; - // TypeRec ::= TypeRec '*' + // Type ::= Type '*' case lltok::star: - if (Result.get()->isLabelTy()) + if (Result->isLabelTy()) return TokError("basic block pointers are invalid"); - if (Result.get()->isVoidTy()) - return TokError("pointers to void are invalid; use i8* instead"); - if (!PointerType::isValidElementType(Result.get())) + if (Result->isVoidTy()) + return TokError("pointers to void are invalid - use i8* instead"); + if (!PointerType::isValidElementType(Result)) return TokError("pointer to this type is invalid"); - Result = HandleUpRefs(PointerType::getUnqual(Result.get())); + Result = PointerType::getUnqual(Result); Lex.Lex(); break; - // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*' + // Type ::= Type 'addrspace' '(' uint32 ')' '*' case lltok::kw_addrspace: { - if (Result.get()->isLabelTy()) + if (Result->isLabelTy()) return TokError("basic block pointers are invalid"); - if (Result.get()->isVoidTy()) + if (Result->isVoidTy()) return TokError("pointers to void are invalid; use i8* instead"); - if (!PointerType::isValidElementType(Result.get())) + if (!PointerType::isValidElementType(Result)) return TokError("pointer to this type is invalid"); unsigned AddrSpace; if (ParseOptionalAddrSpace(AddrSpace) || ParseToken(lltok::star, "expected '*' in address space")) return true; - Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace)); + Result = PointerType::get(Result, AddrSpace); break; } @@ -1487,7 +1328,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, // Parse the argument. LocTy ArgLoc; - PATypeHolder ArgTy(Type::getVoidTy(Context)); + Type *ArgTy = 0; unsigned ArgAttrs1 = Attribute::None; unsigned ArgAttrs2 = Attribute::None; Value *V; @@ -1495,11 +1336,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, return true; // Otherwise, handle normal operands. - if (ParseOptionalAttrs(ArgAttrs1, 0) || - ParseValue(ArgTy, V, PFS) || - // FIXME: Should not allow attributes after the argument, remove this - // in LLVM 3.0. - ParseOptionalAttrs(ArgAttrs2, 3)) + if (ParseOptionalAttrs(ArgAttrs1, 0) || ParseValue(ArgTy, V, PFS)) return true; ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2)); } @@ -1511,7 +1348,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, /// ParseArgumentList - Parse the argument list for a function type or function -/// prototype. If 'inType' is true then we are parsing a FunctionType. +/// prototype. /// ::= '(' ArgTypeListI ')' /// ArgTypeListI /// ::= /*empty*/ @@ -1519,8 +1356,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, /// ::= ArgTypeList ',' '...' /// ::= ArgType (',' ArgType)* /// -bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList, - bool &isVarArg, bool inType) { +bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, + bool &isVarArg){ isVarArg = false; assert(Lex.getKind() == lltok::lparen); Lex.Lex(); // eat the (. @@ -1532,21 +1369,17 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList, Lex.Lex(); } else { LocTy TypeLoc = Lex.getLoc(); - PATypeHolder ArgTy(Type::getVoidTy(Context)); + Type *ArgTy = 0; unsigned Attrs; std::string Name; - // If we're parsing a type, use ParseTypeRec, because we allow recursive - // types (such as a function returning a pointer to itself). If parsing a - // function prototype, we require fully resolved types. - if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || + if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true; if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); - if (Lex.getKind() == lltok::LocalVar || - Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0 + if (Lex.getKind() == lltok::LocalVar) { Name = Lex.getStrVal(); Lex.Lex(); } @@ -1565,21 +1398,19 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList, // Otherwise must be an argument type. TypeLoc = Lex.getLoc(); - if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || - ParseOptionalAttrs(Attrs, 0)) return true; + if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true; if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); - if (Lex.getKind() == lltok::LocalVar || - Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0 + if (Lex.getKind() == lltok::LocalVar) { Name = Lex.getStrVal(); Lex.Lex(); } else { Name = ""; } - if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy()) + if (!ArgTy->isFirstClassType()) return Error(TypeLoc, "invalid type for function argument"); ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); @@ -1591,94 +1422,142 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList, /// ParseFunctionType /// ::= Type ArgumentList OptionalAttrs -bool LLParser::ParseFunctionType(PATypeHolder &Result) { +bool LLParser::ParseFunctionType(Type *&Result) { assert(Lex.getKind() == lltok::lparen); if (!FunctionType::isValidReturnType(Result)) return TokError("invalid function return type"); - std::vector<ArgInfo> ArgList; + SmallVector<ArgInfo, 8> ArgList; bool isVarArg; - unsigned Attrs; - if (ParseArgumentList(ArgList, isVarArg, true) || - // FIXME: Allow, but ignore attributes on function types! - // FIXME: Remove in LLVM 3.0 - ParseOptionalAttrs(Attrs, 2)) + if (ParseArgumentList(ArgList, isVarArg)) return true; // Reject names on the arguments lists. for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) return Error(ArgList[i].Loc, "argument name invalid in function type"); - if (!ArgList[i].Attrs != 0) { - // Allow but ignore attributes on function types; this permits - // auto-upgrade. - // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0 - } + if (ArgList[i].Attrs != 0) + return Error(ArgList[i].Loc, + "argument attributes invalid in function type"); } - std::vector<const Type*> ArgListTy; + SmallVector<Type*, 16> ArgListTy; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) - ArgListTy.push_back(ArgList[i].Type); + ArgListTy.push_back(ArgList[i].Ty); + + Result = FunctionType::get(Result, ArgListTy, isVarArg); + return false; +} - Result = HandleUpRefs(FunctionType::get(Result.get(), - ArgListTy, isVarArg)); +/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into +/// other structs. +bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) { + SmallVector<Type*, 8> Elts; + if (ParseStructBody(Elts)) return true; + + Result = StructType::get(Context, Elts, Packed); return false; } +/// ParseStructDefinition - Parse a struct in a 'type' definition. +bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, + std::pair<Type*, LocTy> &Entry, + Type *&ResultTy) { + // If the type was already defined, diagnose the redefinition. + if (Entry.first && !Entry.second.isValid()) + return Error(TypeLoc, "redefinition of type"); + + // If we have opaque, just return without filling in the definition for the + // struct. This counts as a definition as far as the .ll file goes. + if (EatIfPresent(lltok::kw_opaque)) { + // This type is being defined, so clear the location to indicate this. + Entry.second = SMLoc(); + + // If this type number has never been uttered, create it. + if (Entry.first == 0) + Entry.first = StructType::createNamed(Context, Name); + ResultTy = Entry.first; + return false; + } + + // If the type starts with '<', then it is either a packed struct or a vector. + bool isPacked = EatIfPresent(lltok::less); + + // If we don't have a struct, then we have a random type alias, which we + // accept for compatibility with old files. These types are not allowed to be + // forward referenced and not allowed to be recursive. + if (Lex.getKind() != lltok::lbrace) { + if (Entry.first) + return Error(TypeLoc, "forward references to non-struct type"); + + ResultTy = 0; + if (isPacked) + return ParseArrayVectorType(ResultTy, true); + return ParseType(ResultTy); + } + + // This type is being defined, so clear the location to indicate this. + Entry.second = SMLoc(); + + // If this type number has never been uttered, create it. + if (Entry.first == 0) + Entry.first = StructType::createNamed(Context, Name); + + StructType *STy = cast<StructType>(Entry.first); + + SmallVector<Type*, 8> Body; + if (ParseStructBody(Body) || + (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct"))) + return true; + + STy->setBody(Body, isPacked); + ResultTy = STy; + return false; +} + + /// ParseStructType: Handles packed and unpacked types. </> parsed elsewhere. -/// TypeRec +/// StructType /// ::= '{' '}' -/// ::= '{' TypeRec (',' TypeRec)* '}' +/// ::= '{' Type (',' Type)* '}' /// ::= '<' '{' '}' '>' -/// ::= '<' '{' TypeRec (',' TypeRec)* '}' '>' -bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { +/// ::= '<' '{' Type (',' Type)* '}' '>' +bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) { assert(Lex.getKind() == lltok::lbrace); Lex.Lex(); // Consume the '{' - if (EatIfPresent(lltok::rbrace)) { - Result = StructType::get(Context, Packed); + // Handle the empty struct. + if (EatIfPresent(lltok::rbrace)) return false; - } - std::vector<PATypeHolder> ParamsList; LocTy EltTyLoc = Lex.getLoc(); - if (ParseTypeRec(Result)) return true; - ParamsList.push_back(Result); + Type *Ty = 0; + if (ParseType(Ty)) return true; + Body.push_back(Ty); - if (Result->isVoidTy()) - return Error(EltTyLoc, "struct element can not have void type"); - if (!StructType::isValidElementType(Result)) + if (!StructType::isValidElementType(Ty)) return Error(EltTyLoc, "invalid element type for struct"); while (EatIfPresent(lltok::comma)) { EltTyLoc = Lex.getLoc(); - if (ParseTypeRec(Result)) return true; + if (ParseType(Ty)) return true; - if (Result->isVoidTy()) - return Error(EltTyLoc, "struct element can not have void type"); - if (!StructType::isValidElementType(Result)) + if (!StructType::isValidElementType(Ty)) return Error(EltTyLoc, "invalid element type for struct"); - ParamsList.push_back(Result); + Body.push_back(Ty); } - if (ParseToken(lltok::rbrace, "expected '}' at end of struct")) - return true; - - std::vector<const Type*> ParamsListTy; - for (unsigned i = 0, e = ParamsList.size(); i != e; ++i) - ParamsListTy.push_back(ParamsList[i].get()); - Result = HandleUpRefs(StructType::get(Context, ParamsListTy, Packed)); - return false; + return ParseToken(lltok::rbrace, "expected '}' at end of struct"); } /// ParseArrayVectorType - Parse an array or vector type, assuming the first /// token has already been consumed. -/// TypeRec +/// Type /// ::= '[' APSINTVAL 'x' Types ']' /// ::= '<' APSINTVAL 'x' Types '>' -bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) { +bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) { if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() || Lex.getAPSIntVal().getBitWidth() > 64) return TokError("expected number in address space"); @@ -1691,11 +1570,8 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) { return true; LocTy TypeLoc = Lex.getLoc(); - PATypeHolder EltTy(Type::getVoidTy(Context)); - if (ParseTypeRec(EltTy)) return true; - - if (EltTy->isVoidTy()) - return Error(TypeLoc, "array and vector element type cannot be void"); + Type *EltTy = 0; + if (ParseType(EltTy)) return true; if (ParseToken(isVector ? lltok::greater : lltok::rsquare, "expected end of sequential type")) @@ -1712,7 +1588,7 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) { } else { if (!ArrayType::isValidElementType(EltTy)) return Error(TypeLoc, "invalid array element type"); - Result = HandleUpRefs(ArrayType::get(EltTy, Size)); + Result = ArrayType::get(EltTy, Size); } return false; } @@ -1812,12 +1688,12 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, P.Error(Loc, "'%" + Name + "' is not a basic block"); else P.Error(Loc, "'%" + Name + "' defined with type '" + - Val->getType()->getDescription() + "'"); + getTypeString(Val->getType()) + "'"); return 0; } // Don't make placeholders with invalid type. - if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) { + if (!Ty->isFirstClassType() && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1854,11 +1730,11 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block"); else P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" + - Val->getType()->getDescription() + "'"); + getTypeString(Val->getType()) + "'"); return 0; } - if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) { + if (!Ty->isFirstClassType() && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } @@ -1902,7 +1778,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, if (FI != ForwardRefValIDs.end()) { if (FI->second.first->getType() != Inst->getType()) return P.Error(NameLoc, "instruction forward referenced with type '" + - FI->second.first->getType()->getDescription() + "'"); + getTypeString(FI->second.first->getType()) + "'"); FI->second.first->replaceAllUsesWith(Inst); delete FI->second.first; ForwardRefValIDs.erase(FI); @@ -1918,7 +1794,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, if (FI != ForwardRefVals.end()) { if (FI->second.first->getType() != Inst->getType()) return P.Error(NameLoc, "instruction forward referenced with type '" + - FI->second.first->getType()->getDescription() + "'"); + getTypeString(FI->second.first->getType()) + "'"); FI->second.first->replaceAllUsesWith(Inst); delete FI->second.first; ForwardRefVals.erase(FI); @@ -2001,7 +1877,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ID.Kind = ValID::t_LocalID; break; case lltok::LocalVar: // %foo - case lltok::StringConstant: // "foo" - FIXME: REMOVE IN LLVM 3.0 ID.StrVal = Lex.getStrVal(); ID.Kind = ValID::t_LocalName; break; @@ -2035,9 +1910,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rbrace, "expected end of struct constant")) return true; - ID.ConstantVal = ConstantStruct::get(Context, Elts.data(), - Elts.size(), false); - ID.Kind = ValID::t_Constant; + ID.ConstantStructElts = new Constant*[Elts.size()]; + ID.UIntVal = Elts.size(); + memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0])); + ID.Kind = ValID::t_ConstantStruct; return false; } case lltok::less: { @@ -2055,9 +1931,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (isPackedStruct) { - ID.ConstantVal = - ConstantStruct::get(Context, Elts.data(), Elts.size(), true); - ID.Kind = ValID::t_Constant; + ID.ConstantStructElts = new Constant*[Elts.size()]; + memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0])); + ID.UIntVal = Elts.size(); + ID.Kind = ValID::t_PackedConstantStruct; return false; } @@ -2074,7 +1951,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Elts[i]->getType() != Elts[0]->getType()) return Error(FirstEltLoc, "vector element #" + Twine(i) + - " is not of type '" + Elts[0]->getType()->getDescription()); + " is not of type '" + getTypeString(Elts[0]->getType())); ID.ConstantVal = ConstantVector::get(Elts); ID.Kind = ValID::t_Constant; @@ -2098,7 +1975,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (!Elts[0]->getType()->isFirstClassType()) return Error(FirstEltLoc, "invalid array element type: " + - Elts[0]->getType()->getDescription()); + getTypeString(Elts[0]->getType())); ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size()); @@ -2107,10 +1984,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Elts[i]->getType() != Elts[0]->getType()) return Error(FirstEltLoc, "array element #" + Twine(i) + - " is not of type '" +Elts[0]->getType()->getDescription()); + " is not of type '" + getTypeString(Elts[0]->getType())); } - ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size()); + ID.ConstantVal = ConstantArray::get(ATy, Elts); ID.Kind = ValID::t_Constant; return false; } @@ -2179,7 +2056,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_inttoptr: case lltok::kw_ptrtoint: { unsigned Opc = Lex.getUIntVal(); - PATypeHolder DestTy(Type::getVoidTy(Context)); + Type *DestTy = 0; Constant *SrcVal; Lex.Lex(); if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") || @@ -2190,8 +2067,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy)) return Error(ID.Loc, "invalid cast opcode for cast from '" + - SrcVal->getType()->getDescription() + "' to '" + - DestTy->getDescription() + "'"); + getTypeString(SrcVal->getType()) + "' to '" + + getTypeString(DestTy) + "'"); ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc, SrcVal, DestTy); ID.Kind = ValID::t_Constant; @@ -2209,11 +2086,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (!Val->getType()->isAggregateType()) return Error(ID.Loc, "extractvalue operand must be aggregate type"); - if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), - Indices.end())) + if (!ExtractValueInst::getIndexedType(Val->getType(), Indices)) return Error(ID.Loc, "invalid indices for extractvalue"); - ID.ConstantVal = - ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size()); + ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices); ID.Kind = ValID::t_Constant; return false; } @@ -2230,11 +2105,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (!Val0->getType()->isAggregateType()) return Error(ID.Loc, "insertvalue operand must be aggregate type"); - if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), - Indices.end())) + if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices)) return Error(ID.Loc, "invalid indices for insertvalue"); - ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, - Indices.data(), Indices.size()); + ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices); ID.Kind = ValID::t_Constant; return false; } @@ -2462,9 +2335,9 @@ bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) { } bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { - PATypeHolder Type(Type::getVoidTy(Context)); - return ParseType(Type) || - ParseGlobalValue(Type, V); + Type *Ty = 0; + return ParseType(Ty) || + ParseGlobalValue(Ty, V); } /// ParseGlobalValueVector @@ -2600,7 +2473,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, if (V->getType() != Ty) return Error(ID.Loc, "floating point constant does not have type '" + - Ty->getDescription() + "'"); + getTypeString(Ty) + "'"); return false; case ValID::t_Null: @@ -2610,8 +2483,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, return false; case ValID::t_Undef: // FIXME: LabelTy should not be a first-class type. - if ((!Ty->isFirstClassType() || Ty->isLabelTy()) && - !Ty->isOpaqueTy()) + if (!Ty->isFirstClassType() || Ty->isLabelTy()) return Error(ID.Loc, "invalid type for undef constant"); V = UndefValue::get(Ty); return false; @@ -2632,20 +2504,40 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = ID.ConstantVal; return false; + case ValID::t_ConstantStruct: + case ValID::t_PackedConstantStruct: + if (const StructType *ST = dyn_cast<StructType>(Ty)) { + if (ST->getNumElements() != ID.UIntVal) + return Error(ID.Loc, + "initializer with struct type has wrong # elements"); + if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct)) + return Error(ID.Loc, "packed'ness of initializer and type don't match"); + + // Verify that the elements are compatible with the structtype. + for (unsigned i = 0, e = ID.UIntVal; i != e; ++i) + if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i)) + return Error(ID.Loc, "element " + Twine(i) + + " of struct initializer doesn't match struct element type"); + + V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts, + ID.UIntVal)); + } else + return Error(ID.Loc, "constant expression type mismatch"); + return false; } } -bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { +bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) { V = 0; ValID ID; - return ParseValID(ID, &PFS) || - ConvertValIDToValue(Ty, ID, V, &PFS); + return ParseValID(ID, PFS) || + ConvertValIDToValue(Ty, ID, V, PFS); } -bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { - PATypeHolder T(Type::getVoidTy(Context)); - return ParseType(T) || - ParseValue(T, V, PFS); +bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) { + Type *Ty = 0; + return ParseType(Ty) || + ParseValue(Ty, V, PFS); } bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, @@ -2671,7 +2563,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { unsigned Visibility, RetAttrs; CallingConv::ID CC; - PATypeHolder RetType(Type::getVoidTy(Context)); + Type *RetType = 0; LocTy RetTypeLoc = Lex.getLoc(); if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || @@ -2708,8 +2600,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { return Error(LinkageLoc, "invalid function linkage type"); } - if (!FunctionType::isValidReturnType(RetType) || - RetType->isOpaqueTy()) + if (!FunctionType::isValidReturnType(RetType)) return Error(RetTypeLoc, "invalid function return type"); LocTy NameLoc = Lex.getLoc(); @@ -2732,7 +2623,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (Lex.getKind() != lltok::lparen) return TokError("expected '(' in function argument list"); - std::vector<ArgInfo> ArgList; + SmallVector<ArgInfo, 8> ArgList; bool isVarArg; unsigned FuncAttrs; std::string Section; @@ -2741,7 +2632,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { bool UnnamedAddr; LocTy UnnamedAddrLoc; - if (ParseArgumentList(ArgList, isVarArg, false) || + if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || ParseOptionalAttrs(FuncAttrs, 2) || @@ -2760,21 +2651,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { // Okay, if we got here, the function is syntactically valid. Convert types // and do semantic checks. - std::vector<const Type*> ParamTypeList; + std::vector<Type*> ParamTypeList; SmallVector<AttributeWithIndex, 8> Attrs; - // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function - // attributes. - unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; - if (FuncAttrs & ObsoleteFuncAttrs) { - RetAttrs |= FuncAttrs & ObsoleteFuncAttrs; - FuncAttrs &= ~ObsoleteFuncAttrs; - } if (RetAttrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { - ParamTypeList.push_back(ArgList[i].Type); + ParamTypeList.push_back(ArgList[i].Ty); if (ArgList[i].Attrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); } @@ -2805,21 +2689,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ForwardRefVals.erase(FRVI); } else if ((Fn = M->getFunction(FunctionName))) { - // If this function already exists in the symbol table, then it is - // multiply defined. We accept a few cases for old backwards compat. - // FIXME: Remove this stuff for LLVM 3.0. - if (Fn->getType() != PFT || Fn->getAttributes() != PAL || - (!Fn->isDeclaration() && isDefine)) { - // If the redefinition has different type or different attributes, - // reject it. If both have bodies, reject it. - return Error(NameLoc, "invalid redefinition of function '" + - FunctionName + "'"); - } else if (Fn->isDeclaration()) { - // Make sure to strip off any argument names so we can't get conflicts. - for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end(); - AI != AE; ++AI) - AI->setName(""); - } + // Reject redefinitions. + return Error(NameLoc, "invalid redefinition of function '" + + FunctionName + "'"); } else if (M->getNamedValue(FunctionName)) { return Error(NameLoc, "redefinition of function '@" + FunctionName + "'"); } @@ -2858,10 +2730,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { // Add all of the arguments we parsed to the function. Function::arg_iterator ArgIt = Fn->arg_begin(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) { - // If we run out of arguments in the Function prototype, exit early. - // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above. - if (ArgIt == Fn->arg_end()) break; - // If the argument has a name, insert it into the argument symbol table. if (ArgList[i].Name.empty()) continue; @@ -2879,10 +2747,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { /// ParseFunctionBody /// ::= '{' BasicBlock+ '}' -/// ::= 'begin' BasicBlock+ 'end' // FIXME: remove in LLVM 3.0 /// bool LLParser::ParseFunctionBody(Function &Fn) { - if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin) + if (Lex.getKind() != lltok::lbrace) return TokError("expected '{' in function body"); Lex.Lex(); // eat the {. @@ -2892,10 +2759,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) { PerFunctionState PFS(*this, Fn, FunctionNumber); // We need at least one basic block. - if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end) + if (Lex.getKind() == lltok::rbrace) return TokError("function body requires at least one basic block"); - while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end) + while (Lex.getKind() != lltok::rbrace) if (ParseBasicBlock(PFS)) return true; // Eat the }. @@ -2936,9 +2803,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' after instruction id")) return true; - } else if (Lex.getKind() == lltok::LocalVar || - // FIXME: REMOVE IN LLVM 3.0 - Lex.getKind() == lltok::StringConstant) { + } else if (Lex.getKind() == lltok::LocalVar) { NameStr = Lex.getStrVal(); Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' after instruction name")) @@ -3062,8 +2927,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_tail: return ParseCall(Inst, PFS, true); // Memory. case lltok::kw_alloca: return ParseAlloc(Inst, PFS); - case lltok::kw_malloc: return ParseAlloc(Inst, PFS, BB, false); - case lltok::kw_free: return ParseFree(Inst, PFS, BB); case lltok::kw_load: return ParseLoad(Inst, PFS, false); case lltok::kw_store: return ParseStore(Inst, PFS, false); case lltok::kw_volatile: @@ -3073,7 +2936,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, return ParseStore(Inst, PFS, true); else return TokError("expected 'load' or 'store'"); - case lltok::kw_getresult: return ParseGetResult(Inst, PFS); case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS); case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS); case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS); @@ -3128,14 +2990,19 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { /// ParseRet - Parse a return instruction. /// ::= 'ret' void (',' !dbg, !1)* /// ::= 'ret' TypeAndValue (',' !dbg, !1)* -/// ::= 'ret' TypeAndValue (',' TypeAndValue)+ (',' !dbg, !1)* -/// [[obsolete: LLVM 3.0]] -int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, - PerFunctionState &PFS) { - PATypeHolder Ty(Type::getVoidTy(Context)); +bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, + PerFunctionState &PFS) { + SMLoc TypeLoc = Lex.getLoc(); + Type *Ty = 0; if (ParseType(Ty, true /*void allowed*/)) return true; + Type *ResType = PFS.getFunction().getReturnType(); + if (Ty->isVoidTy()) { + if (!ResType->isVoidTy()) + return Error(TypeLoc, "value doesn't match function result type '" + + getTypeString(ResType) + "'"); + Inst = ReturnInst::Create(Context); return false; } @@ -3143,38 +3010,12 @@ int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, Value *RV; if (ParseValue(Ty, RV, PFS)) return true; - bool ExtraComma = false; - if (EatIfPresent(lltok::comma)) { - // Parse optional custom metadata, e.g. !dbg - if (Lex.getKind() == lltok::MetadataVar) { - ExtraComma = true; - } else { - // The normal case is one return value. - // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring - // use of 'ret {i32,i32} {i32 1, i32 2}' - SmallVector<Value*, 8> RVs; - RVs.push_back(RV); - - do { - // If optional custom metadata, e.g. !dbg is seen then this is the - // end of MRV. - if (Lex.getKind() == lltok::MetadataVar) - break; - if (ParseTypeAndValue(RV, PFS)) return true; - RVs.push_back(RV); - } while (EatIfPresent(lltok::comma)); - - RV = UndefValue::get(PFS.getFunction().getReturnType()); - for (unsigned i = 0, e = RVs.size(); i != e; ++i) { - Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv"); - BB->getInstList().push_back(I); - RV = I; - } - } - } - + if (ResType != RV->getType()) + return Error(TypeLoc, "value doesn't match function result type '" + + getTypeString(ResType) + "'"); + Inst = ReturnInst::Create(Context, RV); - return ExtraComma ? InstExtraComma : InstNormal; + return false; } @@ -3300,7 +3141,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); unsigned RetAttrs, FnAttrs; CallingConv::ID CC; - PATypeHolder RetType(Type::getVoidTy(Context)); + Type *RetType = 0; LocTy RetTypeLoc; ValID CalleeID; SmallVector<ParamInfo, 16> ArgList; @@ -3326,7 +3167,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { if (!(PFTy = dyn_cast<PointerType>(RetType)) || !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) { // Pull out the types of all of the arguments... - std::vector<const Type*> ParamTypes; + std::vector<Type*> ParamTypes; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) ParamTypes.push_back(ArgList[i].V->getType()); @@ -3341,14 +3182,6 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { Value *Callee; if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; - // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional - // function attributes. - unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; - if (FnAttrs & ObsoleteFuncAttrs) { - RetAttrs |= FnAttrs & ObsoleteFuncAttrs; - FnAttrs &= ~ObsoleteFuncAttrs; - } - // Set up the Attributes for the function. SmallVector<AttributeWithIndex, 8> Attrs; if (RetAttrs != Attribute::None) @@ -3370,7 +3203,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { if (ExpectedTy && ExpectedTy != ArgList[i].V->getType()) return Error(ArgList[i].Loc, "argument is not of expected type '" + - ExpectedTy->getDescription() + "'"); + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); if (ArgList[i].Attrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); @@ -3385,8 +3218,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // Finish off the Attributes and check them AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, - Args.begin(), Args.end()); + InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args); II->setCallingConv(CC); II->setAttributes(PAL); Inst = II; @@ -3486,8 +3318,9 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, /// ::= CastOpc TypeAndValue 'to' Type bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc) { - LocTy Loc; Value *Op; - PATypeHolder DestTy(Type::getVoidTy(Context)); + LocTy Loc; + Value *Op; + Type *DestTy = 0; if (ParseTypeAndValue(Op, Loc, PFS) || ParseToken(lltok::kw_to, "expected 'to' after cast value") || ParseType(DestTy)) @@ -3496,8 +3329,8 @@ bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS, if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) { CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy); return Error(Loc, "invalid cast opcode for cast from '" + - Op->getType()->getDescription() + "' to '" + - DestTy->getDescription() + "'"); + getTypeString(Op->getType()) + "' to '" + + getTypeString(DestTy) + "'"); } Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy); return false; @@ -3526,7 +3359,7 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'va_arg' TypeAndValue ',' Type bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) { Value *Op; - PATypeHolder EltTy(Type::getVoidTy(Context)); + Type *EltTy = 0; LocTy TypeLoc; if (ParseTypeAndValue(Op, PFS) || ParseToken(lltok::comma, "expected ',' after vaarg operand") || @@ -3598,11 +3431,10 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { /// ParsePHI /// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')* int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { - PATypeHolder Ty(Type::getVoidTy(Context)); + Type *Ty = 0; LocTy TypeLoc; Value *Op0, *Op1; - LocTy TypeLoc = Lex.getLoc(); - if (ParseType(Ty) || + if (ParseType(Ty, TypeLoc) || ParseToken(lltok::lsquare, "expected '[' in phi value list") || ParseValue(Ty, Op0, PFS) || ParseToken(lltok::comma, "expected ',' after insertelement value") || @@ -3648,7 +3480,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { unsigned RetAttrs, FnAttrs; CallingConv::ID CC; - PATypeHolder RetType(Type::getVoidTy(Context)); + Type *RetType = 0; LocTy RetTypeLoc; ValID CalleeID; SmallVector<ParamInfo, 16> ArgList; @@ -3671,7 +3503,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, if (!(PFTy = dyn_cast<PointerType>(RetType)) || !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) { // Pull out the types of all of the arguments... - std::vector<const Type*> ParamTypes; + std::vector<Type*> ParamTypes; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) ParamTypes.push_back(ArgList[i].V->getType()); @@ -3686,14 +3518,6 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, Value *Callee; if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; - // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional - // function attributes. - unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; - if (FnAttrs & ObsoleteFuncAttrs) { - RetAttrs |= FnAttrs & ObsoleteFuncAttrs; - FnAttrs &= ~ObsoleteFuncAttrs; - } - // Set up the Attributes for the function. SmallVector<AttributeWithIndex, 8> Attrs; if (RetAttrs != Attribute::None) @@ -3715,7 +3539,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, if (ExpectedTy && ExpectedTy != ArgList[i].V->getType()) return Error(ArgList[i].Loc, "argument is not of expected type '" + - ExpectedTy->getDescription() + "'"); + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); if (ArgList[i].Attrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); @@ -3730,7 +3554,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Finish off the Attributes and check them AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end()); + CallInst *CI = CallInst::Create(Callee, Args); CI->setTailCall(isTail); CI->setCallingConv(CC); CI->setAttributes(PAL); @@ -3743,14 +3567,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, //===----------------------------------------------------------------------===// /// ParseAlloc -/// ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)? /// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)? -int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, - BasicBlock* BB, bool isAlloca) { - PATypeHolder Ty(Type::getVoidTy(Context)); +int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { Value *Size = 0; LocTy SizeLoc; unsigned Alignment = 0; + Type *Ty = 0; if (ParseType(Ty)) return true; bool AteExtraComma = false; @@ -3769,37 +3591,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, if (Size && !Size->getType()->isIntegerTy()) return Error(SizeLoc, "element count must have integer type"); - if (isAlloca) { - Inst = new AllocaInst(Ty, Size, Alignment); - return AteExtraComma ? InstExtraComma : InstNormal; - } - - // Autoupgrade old malloc instruction to malloc call. - // FIXME: Remove in LLVM 3.0. - if (Size && !Size->getType()->isIntegerTy(32)) - return Error(SizeLoc, "element count must be i32"); - const Type *IntPtrTy = Type::getInt32Ty(Context); - Constant *AllocSize = ConstantExpr::getSizeOf(Ty); - AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy); - if (!MallocF) - // Prototype malloc as "void *(int32)". - // This function is renamed as "malloc" in ValidateEndOfModule(). - MallocF = cast<Function>( - M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL)); - Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF); -return AteExtraComma ? InstExtraComma : InstNormal; -} - -/// ParseFree -/// ::= 'free' TypeAndValue -bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS, - BasicBlock* BB) { - Value *Val; LocTy Loc; - if (ParseTypeAndValue(Val, Loc, PFS)) return true; - if (!Val->getType()->isPointerTy()) - return Error(Loc, "operand to free must be a pointer"); - Inst = CallInst::CreateFree(Val, BB); - return false; + Inst = new AllocaInst(Ty, Size, Alignment); + return AteExtraComma ? InstExtraComma : InstNormal; } /// ParseLoad @@ -3845,25 +3638,6 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, return AteExtraComma ? InstExtraComma : InstNormal; } -/// ParseGetResult -/// ::= 'getresult' TypeAndValue ',' i32 -/// FIXME: Remove support for getresult in LLVM 3.0 -bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) { - Value *Val; LocTy ValLoc, EltLoc; - unsigned Element; - if (ParseTypeAndValue(Val, ValLoc, PFS) || - ParseToken(lltok::comma, "expected ',' after getresult operand") || - ParseUInt32(Element, EltLoc)) - return true; - - if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy()) - return Error(ValLoc, "getresult inst requires an aggregate operand"); - if (!ExtractValueInst::getIndexedType(Val->getType(), Element)) - return Error(EltLoc, "invalid getresult index for value"); - Inst = ExtractValueInst::Create(Val, Element); - return false; -} - /// ParseGetElementPtr /// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)* int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { @@ -3911,10 +3685,9 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) { if (!Val->getType()->isAggregateType()) return Error(Loc, "extractvalue operand must be aggregate type"); - if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), - Indices.end())) + if (!ExtractValueInst::getIndexedType(Val->getType(), Indices)) return Error(Loc, "invalid indices for extractvalue"); - Inst = ExtractValueInst::Create(Val, Indices.begin(), Indices.end()); + Inst = ExtractValueInst::Create(Val, Indices); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -3933,10 +3706,9 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { if (!Val0->getType()->isAggregateType()) return Error(Loc0, "insertvalue operand must be aggregate type"); - if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), - Indices.end())) + if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices)) return Error(Loc0, "invalid indices for insertvalue"); - Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end()); + Inst = InsertValueInst::Create(Val0, Val1, Indices); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -3962,12 +3734,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts, } Value *V = 0; - PATypeHolder Ty(Type::getVoidTy(Context)); - ValID ID; - if (ParseType(Ty) || ParseValID(ID, PFS) || - ConvertValIDToValue(Ty, ID, V, PFS)) - return true; - + if (ParseTypeAndValue(V, PFS)) return true; Elts.push_back(V); } while (EatIfPresent(lltok::comma)); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 93e7f778ebcb..963065785061 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/ValueHandle.h" #include <map> @@ -32,6 +33,7 @@ namespace llvm { class GlobalValue; class MDString; class MDNode; + class StructType; /// ValID - Represents a reference of a definition of some sort with no type. /// There are several cases where we have to parse the value but where the @@ -47,7 +49,9 @@ namespace llvm { t_Constant, // Value in ConstantVal. t_InlineAsm, // Value in StrVal/StrVal2/UIntVal. t_MDNode, // Value in MDNodeVal. - t_MDString // Value in MDStringVal. + t_MDString, // Value in MDStringVal. + t_ConstantStruct, // Value in ConstantStructElts. + t_PackedConstantStruct // Value in ConstantStructElts. } Kind; LLLexer::LocTy Loc; @@ -58,12 +62,19 @@ namespace llvm { Constant *ConstantVal; MDNode *MDNodeVal; MDString *MDStringVal; - ValID() : APFloatVal(0.0) {} + Constant **ConstantStructElts; + + ValID() : Kind(t_LocalID), APFloatVal(0.0) {} + ~ValID() { + if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) + delete [] ConstantStructElts; + } bool operator<(const ValID &RHS) const { if (Kind == t_LocalID || Kind == t_GlobalID) return UIntVal < RHS.UIntVal; - assert((Kind == t_LocalName || Kind == t_GlobalName) && + assert((Kind == t_LocalName || Kind == t_GlobalName || + Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && "Ordering not defined for this ValID kind yet"); return StrVal < RHS.StrVal; } @@ -93,33 +104,13 @@ namespace llvm { }; DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata; - // Type resolution handling data structures. - std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes; - std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs; - std::vector<PATypeHolder> NumberedTypes; + // Type resolution handling data structures. The location is set when we + // have processed a use of the type but not a definition yet. + StringMap<std::pair<Type*, LocTy> > NamedTypes; + std::vector<std::pair<Type*, LocTy> > NumberedTypes; + std::vector<TrackingVH<MDNode> > NumberedMetadata; std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes; - struct UpRefRecord { - /// Loc - This is the location of the upref. - LocTy Loc; - - /// NestingLevel - The number of nesting levels that need to be popped - /// before this type is resolved. - unsigned NestingLevel; - - /// LastContainedTy - This is the type at the current binding level for - /// the type. Every time we reduce the nesting level, this gets updated. - const Type *LastContainedTy; - - /// UpRefTy - This is the actual opaque type that the upreference is - /// represented with. - OpaqueType *UpRefTy; - - UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy) - : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy), - UpRefTy(URTy) {} - }; - std::vector<UpRefRecord> UpRefs; // Global Value reference information. std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals; @@ -131,14 +122,13 @@ namespace llvm { std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > > ForwardRefBlockAddresses; - Function *MallocF; public: LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), - M(m), MallocF(NULL) {} + M(m) {} bool Run(); - LLVMContext& getContext() { return Context; } + LLVMContext &getContext() { return Context; } private: @@ -223,16 +213,19 @@ namespace llvm { bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); // Type Parsing. - bool ParseType(PATypeHolder &Result, bool AllowVoid = false); - bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) { + bool ParseType(Type *&Result, bool AllowVoid = false); + bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) { Loc = Lex.getLoc(); return ParseType(Result, AllowVoid); } - bool ParseTypeRec(PATypeHolder &H); - bool ParseStructType(PATypeHolder &H, bool Packed); - bool ParseArrayVectorType(PATypeHolder &H, bool isVector); - bool ParseFunctionType(PATypeHolder &Result); - PATypeHolder HandleUpRefs(const Type *Ty); + bool ParseAnonStructType(Type *&Result, bool Packed); + bool ParseStructBody(SmallVectorImpl<Type*> &Body); + bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name, + std::pair<Type*, LocTy> &Entry, + Type *&ResultTy); + + bool ParseArrayVectorType(Type *&Result, bool isVector); + bool ParseFunctionType(Type *&Result); // Function Semantic Analysis. class PerFunctionState { @@ -279,14 +272,20 @@ namespace llvm { bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, PerFunctionState *PFS); - bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS); + bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS); + bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { + return ParseValue(Ty, V, &PFS); + } bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, PerFunctionState &PFS) { Loc = Lex.getLoc(); - return ParseValue(Ty, V, PFS); + return ParseValue(Ty, V, &PFS); } - bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS); + bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS); + bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { + return ParseTypeAndValue(V, &PFS); + } bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) { Loc = Lex.getLoc(); return ParseTypeAndValue(V, PFS); @@ -322,14 +321,13 @@ namespace llvm { // Function Parsing. struct ArgInfo { LocTy Loc; - PATypeHolder Type; + Type *Ty; unsigned Attrs; std::string Name; - ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N) - : Loc(L), Type(Ty), Attrs(Attr), Name(N) {} + ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N) + : Loc(L), Ty(ty), Attrs(Attr), Name(N) {} }; - bool ParseArgumentList(std::vector<ArgInfo> &ArgList, - bool &isVarArg, bool inType); + bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg); bool ParseFunctionHeader(Function *&Fn, bool isDefine); bool ParseFunctionBody(Function &Fn); bool ParseBasicBlock(PerFunctionState &PFS); @@ -341,7 +339,7 @@ namespace llvm { PerFunctionState &PFS); bool ParseCmpPredicate(unsigned &Pred, unsigned Opc); - int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS); + bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS); bool ParseBr(Instruction *&Inst, PerFunctionState &PFS); bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS); bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS); @@ -359,12 +357,9 @@ namespace llvm { bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); int ParsePHI(Instruction *&I, PerFunctionState &PFS); bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); - int ParseAlloc(Instruction *&I, PerFunctionState &PFS, - BasicBlock *BB = 0, bool isAlloca = true); - bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB); + int ParseAlloc(Instruction *&I, PerFunctionState &PFS); int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); - bool ParseGetResult(Instruction *&I, PerFunctionState &PFS); int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 02f97a3d3d23..a5f89fcce0c0 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -32,7 +32,6 @@ namespace lltok { exclaim, // ! kw_x, - kw_begin, kw_end, kw_true, kw_false, kw_declare, kw_define, kw_global, kw_constant, @@ -99,6 +98,7 @@ namespace lltok { kw_noimplicitfloat, kw_naked, kw_hotpatch, + kw_nonlazybind, kw_type, kw_opaque, @@ -121,9 +121,9 @@ namespace lltok { kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_unreachable, - kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr, + kw_alloca, kw_load, kw_store, kw_getelementptr, - kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult, + kw_extractelement, kw_insertelement, kw_shufflevector, kw_extractvalue, kw_insertvalue, kw_blockaddress, // Unsigned Valued tokens (UIntVal). diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index bc995aec83ee..24c29941cf16 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -31,7 +31,7 @@ void BitcodeReader::FreeState() { if (BufferOwned) delete Buffer; Buffer = 0; - std::vector<PATypeHolder>().swap(TypeList); + std::vector<Type*>().swap(TypeList); ValueList.clear(); MDValueList.clear(); @@ -292,11 +292,9 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { // Make the new constant. Constant *NewC; if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) { - NewC = ConstantArray::get(UserCA->getType(), &NewOps[0], - NewOps.size()); + NewC = ConstantArray::get(UserCA->getType(), NewOps); } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) { - NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(), - UserCS->getType()->isPacked()); + NewC = ConstantStruct::get(UserCS->getType(), NewOps); } else if (isa<ConstantVector>(UserC)) { NewC = ConstantVector::get(NewOps); } else { @@ -354,19 +352,28 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { return V; } -const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) { - // If the TypeID is in range, return it. - if (ID < TypeList.size()) - return TypeList[ID].get(); - if (!isTypeTable) return 0; - - // The type table allows forward references. Push as many Opaque types as - // needed to get up to ID. - while (TypeList.size() <= ID) - TypeList.push_back(OpaqueType::get(Context)); - return TypeList.back().get(); +Type *BitcodeReader::getTypeByID(unsigned ID) { + // The type table size is always specified correctly. + if (ID >= TypeList.size()) + return 0; + + if (Type *Ty = TypeList[ID]) + return Ty; + + // If we have a forward reference, the only possible case is when it is to a + // named struct. Just create a placeholder for now. + return TypeList[ID] = StructType::createNamed(Context, ""); } +/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable. +Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) { + if (ID >= TypeList.size()) + TypeList.resize(ID+1); + + return TypeList[ID]; +} + + //===----------------------------------------------------------------------===// // Functions for parsing blocks from the bitcode file //===----------------------------------------------------------------------===// @@ -473,17 +480,22 @@ bool BitcodeReader::ParseAttributeBlock() { } } - bool BitcodeReader::ParseTypeTable() { - if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID)) + if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) return Error("Malformed block record"); + + return ParseTypeTableBody(); +} +bool BitcodeReader::ParseTypeTableBody() { if (!TypeList.empty()) return Error("Multiple TYPE_BLOCKs found!"); SmallVector<uint64_t, 64> Record; unsigned NumRecords = 0; + SmallString<64> TypeName; + // Read all the records for this type table. while (1) { unsigned Code = Stream.ReadCode(); @@ -510,17 +522,15 @@ bool BitcodeReader::ParseTypeTable() { // Read a record. Record.clear(); - const Type *ResultTy = 0; + Type *ResultTy = 0; switch (Stream.ReadRecord(Code, Record)) { - default: // Default behavior: unknown type. - ResultTy = 0; - break; + default: return Error("unknown type in type table"); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the // type list. This allows us to reserve space. if (Record.size() < 1) return Error("Invalid TYPE_CODE_NUMENTRY record"); - TypeList.reserve(Record[0]); + TypeList.resize(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID ResultTy = Type::getVoidTy(Context); @@ -543,9 +553,6 @@ bool BitcodeReader::ParseTypeTable() { case bitc::TYPE_CODE_LABEL: // LABEL ResultTy = Type::getLabelTy(Context); break; - case bitc::TYPE_CODE_OPAQUE: // OPAQUE - ResultTy = 0; - break; case bitc::TYPE_CODE_METADATA: // METADATA ResultTy = Type::getMetadataTy(Context); break; @@ -565,8 +572,9 @@ bool BitcodeReader::ParseTypeTable() { unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; - ResultTy = PointerType::get(getTypeByID(Record[0], true), - AddressSpace); + ResultTy = getTypeByID(Record[0]); + if (ResultTy == 0) return Error("invalid element type in pointer type"); + ResultTy = PointerType::get(ResultTy, AddressSpace); break; } case bitc::TYPE_CODE_FUNCTION: { @@ -574,69 +582,306 @@ bool BitcodeReader::ParseTypeTable() { // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) return Error("Invalid FUNCTION type record"); - std::vector<const Type*> ArgTys; - for (unsigned i = 3, e = Record.size(); i != e; ++i) - ArgTys.push_back(getTypeByID(Record[i], true)); + std::vector<Type*> ArgTys; + for (unsigned i = 3, e = Record.size(); i != e; ++i) { + if (Type *T = getTypeByID(Record[i])) + ArgTys.push_back(T); + else + break; + } + + ResultTy = getTypeByID(Record[2]); + if (ResultTy == 0 || ArgTys.size() < Record.size()-3) + return Error("invalid type in function type"); - ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys, - Record[0]); + ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } - case bitc::TYPE_CODE_STRUCT: { // STRUCT: [ispacked, eltty x N] + case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return Error("Invalid STRUCT type record"); - std::vector<const Type*> EltTys; - for (unsigned i = 1, e = Record.size(); i != e; ++i) - EltTys.push_back(getTypeByID(Record[i], true)); + std::vector<Type*> EltTys; + for (unsigned i = 1, e = Record.size(); i != e; ++i) { + if (Type *T = getTypeByID(Record[i])) + EltTys.push_back(T); + else + break; + } + if (EltTys.size() != Record.size()-1) + return Error("invalid type in struct type"); ResultTy = StructType::get(Context, EltTys, Record[0]); break; } + case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N] + if (ConvertToString(Record, 0, TypeName)) + return Error("Invalid STRUCT_NAME record"); + continue; + + case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] + if (Record.size() < 1) + return Error("Invalid STRUCT type record"); + + if (NumRecords >= TypeList.size()) + return Error("invalid TYPE table"); + + // Check to see if this was forward referenced, if so fill in the temp. + StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]); + if (Res) { + Res->setName(TypeName); + TypeList[NumRecords] = 0; + } else // Otherwise, create a new struct. + Res = StructType::createNamed(Context, TypeName); + TypeName.clear(); + + SmallVector<Type*, 8> EltTys; + for (unsigned i = 1, e = Record.size(); i != e; ++i) { + if (Type *T = getTypeByID(Record[i])) + EltTys.push_back(T); + else + break; + } + if (EltTys.size() != Record.size()-1) + return Error("invalid STRUCT type record"); + Res->setBody(EltTys, Record[0]); + ResultTy = Res; + break; + } + case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: [] + if (Record.size() != 1) + return Error("Invalid OPAQUE type record"); + + if (NumRecords >= TypeList.size()) + return Error("invalid TYPE table"); + + // Check to see if this was forward referenced, if so fill in the temp. + StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]); + if (Res) { + Res->setName(TypeName); + TypeList[NumRecords] = 0; + } else // Otherwise, create a new struct with no body. + Res = StructType::createNamed(Context, TypeName); + TypeName.clear(); + ResultTy = Res; + break; + } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return Error("Invalid ARRAY type record"); - ResultTy = ArrayType::get(getTypeByID(Record[1], true), Record[0]); + if ((ResultTy = getTypeByID(Record[1]))) + ResultTy = ArrayType::get(ResultTy, Record[0]); + else + return Error("Invalid ARRAY type element"); break; case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] if (Record.size() < 2) return Error("Invalid VECTOR type record"); - ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]); + if ((ResultTy = getTypeByID(Record[1]))) + ResultTy = VectorType::get(ResultTy, Record[0]); + else + return Error("Invalid ARRAY type element"); break; } - if (NumRecords == TypeList.size()) { - // If this is a new type slot, just append it. - TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get(Context)); - ++NumRecords; - } else if (ResultTy == 0) { - // Otherwise, this was forward referenced, so an opaque type was created, - // but the result type is actually just an opaque. Leave the one we - // created previously. - ++NumRecords; - } else { - // Otherwise, this was forward referenced, so an opaque type was created. - // Resolve the opaque type to the real type now. - assert(NumRecords < TypeList.size() && "Typelist imbalance"); - const OpaqueType *OldTy = cast<OpaqueType>(TypeList[NumRecords++].get()); - - // Don't directly push the new type on the Tab. Instead we want to replace - // the opaque type we previously inserted with the new concrete value. The - // refinement from the abstract (opaque) type to the new type causes all - // uses of the abstract type to use the concrete type (NewTy). This will - // also cause the opaque type to be deleted. - const_cast<OpaqueType*>(OldTy)->refineAbstractTypeTo(ResultTy); - - // This should have replaced the old opaque type with the new type in the - // value table... or with a preexisting type that was already in the - // system. Let's just make sure it did. - assert(TypeList[NumRecords-1].get() != OldTy && - "refineAbstractType didn't work!"); + if (NumRecords >= TypeList.size()) + return Error("invalid TYPE table"); + assert(ResultTy && "Didn't read a type?"); + assert(TypeList[NumRecords] == 0 && "Already read type?"); + TypeList[NumRecords++] = ResultTy; + } +} + +// FIXME: Remove in LLVM 3.1 +bool BitcodeReader::ParseOldTypeTable() { + if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD)) + return Error("Malformed block record"); + + if (!TypeList.empty()) + return Error("Multiple TYPE_BLOCKs found!"); + + + // While horrible, we have no good ordering of types in the bc file. Just + // iteratively parse types out of the bc file in multiple passes until we get + // them all. Do this by saving a cursor for the start of the type block. + BitstreamCursor StartOfTypeBlockCursor(Stream); + + unsigned NumTypesRead = 0; + + SmallVector<uint64_t, 64> Record; +RestartScan: + unsigned NextTypeID = 0; + bool ReadAnyTypes = false; + + // Read all the records for this type table. + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (NextTypeID != TypeList.size()) + return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD"); + + // If we haven't read all of the types yet, iterate again. + if (NumTypesRead != TypeList.size()) { + // If we didn't successfully read any types in this pass, then we must + // have an unhandled forward reference. + if (!ReadAnyTypes) + return Error("Obsolete bitcode contains unhandled recursive type"); + + Stream = StartOfTypeBlockCursor; + goto RestartScan; + } + + if (Stream.ReadBlockEnd()) + return Error("Error at end of type table block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; } + + // Read a record. + Record.clear(); + Type *ResultTy = 0; + switch (Stream.ReadRecord(Code, Record)) { + default: return Error("unknown type in type table"); + case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] + // TYPE_CODE_NUMENTRY contains a count of the number of types in the + // type list. This allows us to reserve space. + if (Record.size() < 1) + return Error("Invalid TYPE_CODE_NUMENTRY record"); + TypeList.resize(Record[0]); + continue; + case bitc::TYPE_CODE_VOID: // VOID + ResultTy = Type::getVoidTy(Context); + break; + case bitc::TYPE_CODE_FLOAT: // FLOAT + ResultTy = Type::getFloatTy(Context); + break; + case bitc::TYPE_CODE_DOUBLE: // DOUBLE + ResultTy = Type::getDoubleTy(Context); + break; + case bitc::TYPE_CODE_X86_FP80: // X86_FP80 + ResultTy = Type::getX86_FP80Ty(Context); + break; + case bitc::TYPE_CODE_FP128: // FP128 + ResultTy = Type::getFP128Ty(Context); + break; + case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 + ResultTy = Type::getPPC_FP128Ty(Context); + break; + case bitc::TYPE_CODE_LABEL: // LABEL + ResultTy = Type::getLabelTy(Context); + break; + case bitc::TYPE_CODE_METADATA: // METADATA + ResultTy = Type::getMetadataTy(Context); + break; + case bitc::TYPE_CODE_X86_MMX: // X86_MMX + ResultTy = Type::getX86_MMXTy(Context); + break; + case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] + if (Record.size() < 1) + return Error("Invalid Integer type record"); + ResultTy = IntegerType::get(Context, Record[0]); + break; + case bitc::TYPE_CODE_OPAQUE: // OPAQUE + if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0) + ResultTy = StructType::createNamed(Context, ""); + break; + case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD + if (NextTypeID >= TypeList.size()) break; + // If we already read it, don't reprocess. + if (TypeList[NextTypeID] && + !cast<StructType>(TypeList[NextTypeID])->isOpaque()) + break; + + // Set a type. + if (TypeList[NextTypeID] == 0) + TypeList[NextTypeID] = StructType::createNamed(Context, ""); + + std::vector<Type*> EltTys; + for (unsigned i = 1, e = Record.size(); i != e; ++i) { + if (Type *Elt = getTypeByIDOrNull(Record[i])) + EltTys.push_back(Elt); + else + break; + } + + if (EltTys.size() != Record.size()-1) + break; // Not all elements are ready. + + cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]); + ResultTy = TypeList[NextTypeID]; + TypeList[NextTypeID] = 0; + break; + } + case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or + // [pointee type, address space] + if (Record.size() < 1) + return Error("Invalid POINTER type record"); + unsigned AddressSpace = 0; + if (Record.size() == 2) + AddressSpace = Record[1]; + if ((ResultTy = getTypeByIDOrNull(Record[0]))) + ResultTy = PointerType::get(ResultTy, AddressSpace); + break; + } + case bitc::TYPE_CODE_FUNCTION: { + // FIXME: attrid is dead, remove it in LLVM 3.0 + // FUNCTION: [vararg, attrid, retty, paramty x N] + if (Record.size() < 3) + return Error("Invalid FUNCTION type record"); + std::vector<Type*> ArgTys; + for (unsigned i = 3, e = Record.size(); i != e; ++i) { + if (Type *Elt = getTypeByIDOrNull(Record[i])) + ArgTys.push_back(Elt); + else + break; + } + if (ArgTys.size()+3 != Record.size()) + break; // Something was null. + if ((ResultTy = getTypeByIDOrNull(Record[2]))) + ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); + break; + } + case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] + if (Record.size() < 2) + return Error("Invalid ARRAY type record"); + if ((ResultTy = getTypeByIDOrNull(Record[1]))) + ResultTy = ArrayType::get(ResultTy, Record[0]); + break; + case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] + if (Record.size() < 2) + return Error("Invalid VECTOR type record"); + if ((ResultTy = getTypeByIDOrNull(Record[1]))) + ResultTy = VectorType::get(ResultTy, Record[0]); + break; + } + + if (NextTypeID >= TypeList.size()) + return Error("invalid TYPE table"); + + if (ResultTy && TypeList[NextTypeID] == 0) { + ++NumTypesRead; + ReadAnyTypes = true; + + TypeList[NextTypeID] = ResultTy; + } + + ++NextTypeID; } } -bool BitcodeReader::ParseTypeSymbolTable() { - if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID)) +bool BitcodeReader::ParseOldTypeSymbolTable() { + if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD)) return Error("Malformed block record"); SmallVector<uint64_t, 64> Record; @@ -676,7 +921,10 @@ bool BitcodeReader::ParseTypeSymbolTable() { if (TypeID >= TypeList.size()) return Error("Invalid Type ID in TST_ENTRY record"); - TheModule->addTypeName(TypeName, TypeList[TypeID].get()); + // Only apply the type name to a struct type with no name. + if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID])) + if (!STy->isAnonymous() && !STy->hasName()) + STy->setName(TypeName); TypeName.clear(); break; } @@ -790,13 +1038,9 @@ bool BitcodeReader::ParseMetadata() { Record.clear(); Code = Stream.ReadCode(); - // METADATA_NAME is always followed by METADATA_NAMED_NODE2. - // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0. + // METADATA_NAME is always followed by METADATA_NAMED_NODE. unsigned NextBitCode = Stream.ReadRecord(Code, Record); - if (NextBitCode == bitc::METADATA_NAMED_NODE) { - LLVM2_7MetadataDetected = true; - } else if (NextBitCode != bitc::METADATA_NAMED_NODE2) - assert ( 0 && "Invalid Named Metadata record"); + assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode; // Read named metadata elements. unsigned Size = Record.size(); @@ -807,35 +1051,20 @@ bool BitcodeReader::ParseMetadata() { return Error("Malformed metadata record"); NMD->addOperand(MD); } - // Backwards compatibility hack: NamedMDValues used to be Values, - // and they got their own slots in the value numbering. They are no - // longer Values, however we still need to account for them in the - // numbering in order to be able to read old bitcode files. - // FIXME: Remove this in LLVM 3.0. - if (LLVM2_7MetadataDetected) - MDValueList.AssignValue(0, NextMDValueNo++); break; } - case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0. - case bitc::METADATA_FN_NODE2: + case bitc::METADATA_FN_NODE: IsFunctionLocal = true; // fall-through - case bitc::METADATA_NODE: // FIXME: Remove in LLVM 3.0. - case bitc::METADATA_NODE2: { - - // Detect 2.7-era metadata. - // FIXME: Remove in LLVM 3.0. - if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE) - LLVM2_7MetadataDetected = true; - + case bitc::METADATA_NODE: { if (Record.size() % 2 == 1) - return Error("Invalid METADATA_NODE2 record"); + return Error("Invalid METADATA_NODE record"); unsigned Size = Record.size(); SmallVector<Value*, 8> Elts; for (unsigned i = 0; i != Size; i += 2) { const Type *Ty = getTypeByID(Record[i]); - if (!Ty) return Error("Invalid METADATA_NODE2 record"); + if (!Ty) return Error("Invalid METADATA_NODE record"); if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); else if (!Ty->isVoidTy()) @@ -1331,12 +1560,16 @@ bool BitcodeReader::ParseModule() { if (ParseAttributeBlock()) return true; break; - case bitc::TYPE_BLOCK_ID: + case bitc::TYPE_BLOCK_ID_NEW: if (ParseTypeTable()) return true; break; - case bitc::TYPE_SYMTAB_BLOCK_ID: - if (ParseTypeSymbolTable()) + case bitc::TYPE_BLOCK_ID_OLD: + if (ParseOldTypeTable()) + return true; + break; + case bitc::TYPE_SYMTAB_BLOCK_ID_OLD: + if (ParseOldTypeSymbolTable()) return true; break; case bitc::VALUE_SYMTAB_BLOCK_ID: @@ -1755,10 +1988,7 @@ bool BitcodeReader::ParseMetadataAttachment() { switch (Stream.ReadRecord(Code, Record)) { default: // Default behavior: ignore. break; - // FIXME: Remove in LLVM 3.0. - case bitc::METADATA_ATTACHMENT: - LLVM2_7MetadataDetected = true; - case bitc::METADATA_ATTACHMENT2: { + case bitc::METADATA_ATTACHMENT: { unsigned RecordLength = Record.size(); if (Record.empty() || (RecordLength - 1) % 2 == 1) return Error ("Invalid METADATA_ATTACHMENT reader!"); @@ -1870,10 +2100,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = 0; continue; - // FIXME: Remove this in LLVM 3.0. - case bitc::FUNC_CODE_DEBUG_LOC: - LLVM2_7MetadataDetected = true; - case bitc::FUNC_CODE_DEBUG_LOC2: { // DEBUG_LOC: [line, col, scope, ia] + case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia] I = 0; // Get the last instruction emitted. if (CurBB && !CurBB->empty()) I = &CurBB->back(); @@ -1979,8 +2206,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { EXTRACTVALIdx.push_back((unsigned)Index); } - I = ExtractValueInst::Create(Agg, - EXTRACTVALIdx.begin(), EXTRACTVALIdx.end()); + I = ExtractValueInst::Create(Agg, EXTRACTVALIdx); InstructionList.push_back(I); break; } @@ -2004,8 +2230,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { INSERTVALIdx.push_back((unsigned)Index); } - I = InsertValueInst::Create(Agg, Val, - INSERTVALIdx.begin(), INSERTVALIdx.end()); + I = InsertValueInst::Create(Agg, Val, INSERTVALIdx); InstructionList.push_back(I); break; } @@ -2112,18 +2337,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } - case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n] - if (Record.size() != 2) - return Error("Invalid GETRESULT record"); - unsigned OpNum = 0; - Value *Op; - getValueTypePair(Record, OpNum, NextValueNo, Op); - unsigned Index = Record[1]; - I = ExtractValueInst::Create(Op, Index); - InstructionList.push_back(I); - break; - } - case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>] { unsigned Size = Record.size(); @@ -2134,33 +2347,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } unsigned OpNum = 0; - SmallVector<Value *,4> Vs; - do { - Value *Op = NULL; - if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error("Invalid RET record"); - Vs.push_back(Op); - } while(OpNum != Record.size()); - - const Type *ReturnType = F->getReturnType(); - // Handle multiple return values. FIXME: Remove in LLVM 3.0. - if (Vs.size() > 1 || - (ReturnType->isStructTy() && - (Vs.empty() || Vs[0]->getType() != ReturnType))) { - Value *RV = UndefValue::get(ReturnType); - for (unsigned i = 0, e = Vs.size(); i != e; ++i) { - I = InsertValueInst::Create(RV, Vs[i], i, "mrv"); - InstructionList.push_back(I); - CurBB->getInstList().push_back(I); - ValueList.AssignValue(I, NextValueNo++); - RV = I; - } - I = ReturnInst::Create(Context, RV); - InstructionList.push_back(I); - break; - } + Value *Op = NULL; + if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + return Error("Invalid RET record"); + if (OpNum != Record.size()) + return Error("Invalid RET record"); - I = ReturnInst::Create(Context, Vs[0]); + I = ReturnInst::Create(Context, Op); InstructionList.push_back(I); break; } @@ -2272,8 +2465,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } } - I = InvokeInst::Create(Callee, NormalBB, UnwindBB, - Ops.begin(), Ops.end()); + I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops); InstructionList.push_back(I); cast<InvokeInst>(I)->setCallingConv( static_cast<CallingConv::ID>(CCInfo)); @@ -2307,47 +2499,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } - case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align] - // Autoupgrade malloc instruction to malloc call. - // FIXME: Remove in LLVM 3.0. - if (Record.size() < 3) - return Error("Invalid MALLOC record"); - const PointerType *Ty = - dyn_cast_or_null<PointerType>(getTypeByID(Record[0])); - Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context)); - if (!Ty || !Size) return Error("Invalid MALLOC record"); - if (!CurBB) return Error("Invalid malloc instruction with no BB"); - const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext()); - Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType()); - AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty); - I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(), - AllocSize, Size, NULL); - InstructionList.push_back(I); - break; - } - case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty] - unsigned OpNum = 0; - Value *Op; - if (getValueTypePair(Record, OpNum, NextValueNo, Op) || - OpNum != Record.size()) - return Error("Invalid FREE record"); - if (!CurBB) return Error("Invalid free instruction with no BB"); - I = CallInst::CreateFree(Op, CurBB); - InstructionList.push_back(I); - break; - } case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] - // For backward compatibility, tolerate a lack of an opty, and use i32. - // Remove this in LLVM 3.0. - if (Record.size() < 3 || Record.size() > 4) + if (Record.size() != 4) return Error("Invalid ALLOCA record"); - unsigned OpNum = 0; const PointerType *Ty = - dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++])); - const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) : - Type::getInt32Ty(Context); - Value *Size = getFnValueByID(Record[OpNum++], OpTy); - unsigned Align = Record[OpNum++]; + dyn_cast_or_null<PointerType>(getTypeByID(Record[0])); + const Type *OpTy = getTypeByID(Record[1]); + Value *Size = getFnValueByID(Record[2], OpTy); + unsigned Align = Record[3]; if (!Ty || !Size) return Error("Invalid ALLOCA record"); I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); InstructionList.push_back(I); @@ -2364,7 +2523,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol] + case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -2377,24 +2536,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol] - // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0. - unsigned OpNum = 0; - Value *Val, *Ptr; - if (getValueTypePair(Record, OpNum, NextValueNo, Val) || - getValue(Record, OpNum, - PointerType::getUnqual(Val->getType()), Ptr)|| - OpNum+2 != Record.size()) - return Error("Invalid STORE record"); - - I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); - InstructionList.push_back(I); - break; - } - // FIXME: Remove this in LLVM 3.0. - case bitc::FUNC_CODE_INST_CALL: - LLVM2_7MetadataDetected = true; - case bitc::FUNC_CODE_INST_CALL2: { + case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] if (Record.size() < 3) return Error("Invalid CALL record"); @@ -2416,7 +2558,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { SmallVector<Value*, 16> Args; // Read the fixed params. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { - if (FTy->getParamType(i)->getTypeID()==Type::LabelTyID) + if (FTy->getParamType(i)->isLabelTy()) Args.push_back(getBasicBlock(Record[OpNum])); else Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); @@ -2436,7 +2578,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } } - I = CallInst::Create(Callee, Args.begin(), Args.end()); + I = CallInst::Create(Callee, Args); InstructionList.push_back(I); cast<CallInst>(I)->setCallingConv( static_cast<CallingConv::ID>(CCInfo>>1)); @@ -2513,23 +2655,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { BlockAddrFwdRefs.erase(BAFRI); } - // FIXME: Remove this in LLVM 3.0. - unsigned NewMDValueListSize = MDValueList.size(); - // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); MDValueList.shrinkTo(ModuleMDValueListSize); - - // Backwards compatibility hack: Function-local metadata numbers - // were previously not reset between functions. This is now fixed, - // however we still need to understand the old numbering in order - // to be able to read old bitcode files. - // FIXME: Remove this in LLVM 3.0. - if (LLVM2_7MetadataDetected) - MDValueList.resize(NewMDValueListSize); - std::vector<BasicBlock*>().swap(FunctionBBs); - return false; } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index f8fc079c73d9..1b3bf1a1854a 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -44,9 +44,9 @@ class BitcodeReaderValueList { /// number that holds the resolved value. typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy; ResolveConstantsTy ResolveConstants; - LLVMContext& Context; + LLVMContext &Context; public: - BitcodeReaderValueList(LLVMContext& C) : Context(C) {} + BitcodeReaderValueList(LLVMContext &C) : Context(C) {} ~BitcodeReaderValueList() { assert(ResolveConstants.empty() && "Constants not resolved?"); } @@ -131,7 +131,7 @@ class BitcodeReader : public GVMaterializer { const char *ErrorString; - std::vector<PATypeHolder> TypeList; + std::vector<Type*> TypeList; BitcodeReaderValueList ValueList; BitcodeReaderMDValueList MDValueList; SmallVector<Instruction *, 64> InstructionList; @@ -174,17 +174,10 @@ class BitcodeReader : public GVMaterializer { typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy; DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs; - /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or - /// earlier was detected, in which case we behave slightly differently, - /// for compatibility. - /// FIXME: Remove in LLVM 3.0. - bool LLVM2_7MetadataDetected; - public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), - ErrorString(0), ValueList(C), MDValueList(C), - LLVM2_7MetadataDetected(false) { + ErrorString(0), ValueList(C), MDValueList(C) { HasReversedFunctionsWithBodies = false; } ~BitcodeReader() { @@ -217,12 +210,12 @@ public: /// @returns true if an error occurred. bool ParseTriple(std::string &Triple); private: - const Type *getTypeByID(unsigned ID, bool isTypeTable = false); + Type *getTypeByID(unsigned ID); + Type *getTypeByIDOrNull(unsigned ID); Value *getFnValueByID(unsigned ID, const Type *Ty) { - if (Ty == Type::getMetadataTy(Context)) + if (Ty && Ty->isMetadataTy()) return MDValueList.getValueFwdRef(ID); - else - return ValueList.getValueFwdRef(ID, Ty); + return ValueList.getValueFwdRef(ID, Ty); } BasicBlock *getBasicBlock(unsigned ID) const { if (ID >= FunctionBBs.size()) return 0; // Invalid ID @@ -266,7 +259,10 @@ private: bool ParseModule(); bool ParseAttributeBlock(); bool ParseTypeTable(); - bool ParseTypeSymbolTable(); + bool ParseOldTypeTable(); // FIXME: Remove in LLVM 3.1 + bool ParseTypeTableBody(); + + bool ParseOldTypeSymbolTable(); // FIXME: Remove in LLVM 3.1 bool ParseValueSymbolTable(); bool ParseConstants(); bool RememberAndSkipFunctionBody(); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 6972a451606a..85d67ce62b9f 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -21,13 +21,14 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Operator.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/ValueSymbolTable.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Program.h" #include <cctype> +#include <map> using namespace llvm; /// These are manifest constants used by the bitcode writer. They do not need to @@ -100,13 +101,16 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { } } -static void WriteStringRecord(unsigned Code, const std::string &Str, +static void WriteStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse, BitstreamWriter &Stream) { SmallVector<unsigned, 64> Vals; // Code: [strchar x N] - for (unsigned i = 0, e = Str.size(); i != e; ++i) + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i])) + AbbrevToUse = 0; Vals.push_back(Str[i]); + } // Emit the finished record. Stream.EmitRecord(Code, Vals, AbbrevToUse); @@ -150,7 +154,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE, static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const ValueEnumerator::TypeList &TypeList = VE.getTypes(); - Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */); + Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); SmallVector<uint64_t, 64> TypeVals; // Abbrev for TYPE_CODE_POINTER. @@ -171,15 +175,32 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Log2_32_Ceil(VE.getTypes().size()+1))); unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); - // Abbrev for TYPE_CODE_STRUCT. + // Abbrev for TYPE_CODE_STRUCT_ANON. Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT)); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(VE.getTypes().size()+1))); - unsigned StructAbbrev = Stream.EmitAbbrev(Abbv); + unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for TYPE_CODE_STRUCT_NAME. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for TYPE_CODE_STRUCT_NAMED. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); @@ -201,16 +222,15 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { switch (T->getTypeID()) { default: llvm_unreachable("Unknown type!"); - case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; - case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; - case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; - case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; - case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; + case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; + case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; + case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; + case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; + case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; - case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; - case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break; - case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; - case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; + case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; + case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; + case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; case Type::IntegerTyID: // INTEGER: [width] Code = bitc::TYPE_CODE_INTEGER; @@ -241,13 +261,28 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { case Type::StructTyID: { const StructType *ST = cast<StructType>(T); // STRUCT: [ispacked, eltty x N] - Code = bitc::TYPE_CODE_STRUCT; TypeVals.push_back(ST->isPacked()); // Output all of the element types. for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) TypeVals.push_back(VE.getTypeID(*I)); - AbbrevToUse = StructAbbrev; + + if (ST->isAnonymous()) { + Code = bitc::TYPE_CODE_STRUCT_ANON; + AbbrevToUse = StructAnonAbbrev; + } else { + if (ST->isOpaque()) { + Code = bitc::TYPE_CODE_OPAQUE; + } else { + Code = bitc::TYPE_CODE_STRUCT_NAMED; + AbbrevToUse = StructNamedAbbrev; + } + + // Emit the name if it is present. + if (!ST->getName().empty()) + WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), + StructNameAbbrev, Stream); + } break; } case Type::ArrayTyID: { @@ -489,8 +524,8 @@ static void WriteMDNode(const MDNode *N, Record.push_back(0); } } - unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 : - bitc::METADATA_NODE2; + unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE : + bitc::METADATA_NODE; Stream.EmitRecord(MDCode, Record, 0); Record.clear(); } @@ -553,7 +588,7 @@ static void WriteModuleMetadata(const Module *M, // Write named metadata operands. for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) Record.push_back(VE.getValueID(NMD->getOperand(i))); - Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0); + Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); Record.clear(); } @@ -589,7 +624,7 @@ static void WriteMetadataAttachment(const Function &F, SmallVector<uint64_t, 64> Record; // Write metadata attachments - // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]] + // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] SmallVector<std::pair<unsigned, MDNode*>, 4> MDs; for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -607,7 +642,7 @@ static void WriteMetadataAttachment(const Function &F, Record.push_back(MDs[i].first); Record.push_back(VE.getValueID(MDs[i].second)); } - Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0); + Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); Record.clear(); } @@ -1078,12 +1113,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; break; - case Instruction::PHI: + case Instruction::PHI: { + const PHINode &PN = cast<PHINode>(I); Code = bitc::FUNC_CODE_INST_PHI; - Vals.push_back(VE.getTypeID(I.getType())); - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i))); + Vals.push_back(VE.getTypeID(PN.getType())); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + Vals.push_back(VE.getValueID(PN.getIncomingValue(i))); + Vals.push_back(VE.getValueID(PN.getIncomingBlock(i))); + } break; + } case Instruction::Alloca: Code = bitc::FUNC_CODE_INST_ALLOCA; @@ -1102,7 +1141,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(cast<LoadInst>(I).isVolatile()); break; case Instruction::Store: - Code = bitc::FUNC_CODE_INST_STORE2; + Code = bitc::FUNC_CODE_INST_STORE; PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr Vals.push_back(VE.getValueID(I.getOperand(0))); // val. Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1); @@ -1113,7 +1152,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - Code = bitc::FUNC_CODE_INST_CALL2; + Code = bitc::FUNC_CODE_INST_CALL; Vals.push_back(VE.getAttributeID(CI.getAttributes())); Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall())); @@ -1257,7 +1296,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, Vals.push_back(DL.getCol()); Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0); Vals.push_back(IA ? VE.getValueID(IA)+1 : 0); - Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); Vals.clear(); LastDL = DL; @@ -1273,46 +1312,6 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, Stream.ExitBlock(); } -/// WriteTypeSymbolTable - Emit a block for the specified type symtab. -static void WriteTypeSymbolTable(const TypeSymbolTable &TST, - const ValueEnumerator &VE, - BitstreamWriter &Stream) { - if (TST.empty()) return; - - Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3); - - // 7-bit fixed width VST_CODE_ENTRY strings. - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); - unsigned V7Abbrev = Stream.EmitAbbrev(Abbv); - - SmallVector<unsigned, 64> NameVals; - - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); - TI != TE; ++TI) { - // TST_ENTRY: [typeid, namechar x N] - NameVals.push_back(VE.getTypeID(TI->second)); - - const std::string &Str = TI->first; - bool is7Bit = true; - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - NameVals.push_back((unsigned char)Str[i]); - if (Str[i] & 128) - is7Bit = false; - } - - // Emit the finished record. - Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0); - NameVals.clear(); - } - - Stream.ExitBlock(); -} - // Emit blockinfo, which defines the standard abbreviations etc. static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { // We only want to emit block info records for blocks that have multiple @@ -1516,9 +1515,6 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit metadata. WriteModuleMetadataStore(M, Stream); - // Emit the type symbol table information. - WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream); - // Emit names for globals/functions etc. WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); @@ -1543,40 +1539,7 @@ enum { DarwinBCHeaderSize = 5*4 }; -/// isARMTriplet - Return true if the triplet looks like: -/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. -static bool isARMTriplet(const std::string &TT) { - size_t Pos = 0; - size_t Size = TT.size(); - if (Size >= 6 && - TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' && - TT[3] == 'm' && TT[4] == 'b') - Pos = 5; - else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm') - Pos = 3; - else - return false; - - if (TT[Pos] == '-') - return true; - else if (TT[Pos] == 'v') { - if (Size >= Pos+4 && - TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2') - return true; - else if (Size >= Pos+4 && - TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e') - return true; - } else - return false; - while (++Pos < Size && TT[Pos] != '-') { - if (!isdigit(TT[Pos])) - return false; - } - return true; -} - -static void EmitDarwinBCHeader(BitstreamWriter &Stream, - const std::string &TT) { +static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) { unsigned CPUType = ~0U; // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*, @@ -1590,16 +1553,16 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, DARWIN_CPU_TYPE_POWERPC = 18 }; - if (TT.find("x86_64-") == 0) + Triple::ArchType Arch = TT.getArch(); + if (Arch == Triple::x86_64) CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; - else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && - TT[4] == '-' && TT[1] - '3' < 6) + else if (Arch == Triple::x86) CPUType = DARWIN_CPU_TYPE_X86; - else if (TT.find("powerpc-") == 0) + else if (Arch == Triple::ppc) CPUType = DARWIN_CPU_TYPE_POWERPC; - else if (TT.find("powerpc64-") == 0) + else if (Arch == Triple::ppc64) CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; - else if (isARMTriplet(TT)) + else if (Arch == Triple::arm || Arch == Triple::thumb) CPUType = DARWIN_CPU_TYPE_ARM; // Traditional Bitcode starts after header. @@ -1645,11 +1608,9 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) { // If this is darwin or another generic macho target, emit a file header and // trailer if needed. - bool isMacho = - M->getTargetTriple().find("-darwin") != std::string::npos || - M->getTargetTriple().find("-macho") != std::string::npos; - if (isMacho) - EmitDarwinBCHeader(Stream, M->getTargetTriple()); + Triple TT(M->getTargetTriple()); + if (TT.isOSDarwin()) + EmitDarwinBCHeader(Stream, TT); // Emit the file header. Stream.Emit((unsigned)'B', 8); @@ -1662,6 +1623,6 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) { // Emit the module. WriteModule(M, Stream); - if (isMacho) + if (TT.isOSDarwin()) EmitDarwinBCTrailer(Stream, Stream.getBuffer().size()); } diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 5138c3c984f3..b68bf92d51b2 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -17,7 +17,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Instructions.h" #include <algorithm> @@ -59,9 +58,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) { I != E; ++I) EnumerateValue(I->getAliasee()); - // Enumerate types used by the type symbol table. - EnumerateTypeSymbolTable(M->getTypeSymbolTable()); - // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); @@ -109,78 +105,12 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); - - OptimizeTypes(); - - // Now that we rearranged the type table, rebuild TypeMap. - for (unsigned i = 0, e = Types.size(); i != e; ++i) - TypeMap[Types[i]] = i+1; -} - -struct TypeAndDeps { - const Type *Ty; - unsigned NumDeps; -}; - -static int CompareByDeps(const void *a, const void *b) { - const TypeAndDeps &ta = *(const TypeAndDeps*) a; - const TypeAndDeps &tb = *(const TypeAndDeps*) b; - return ta.NumDeps - tb.NumDeps; -} - -static void VisitType(const Type *Ty, SmallPtrSet<const Type*, 16> &Visited, - std::vector<const Type*> &Out) { - if (Visited.count(Ty)) - return; - - Visited.insert(Ty); - - for (Type::subtype_iterator I2 = Ty->subtype_begin(), - E2 = Ty->subtype_end(); I2 != E2; ++I2) { - const Type *InnerType = I2->get(); - VisitType(InnerType, Visited, Out); - } - - Out.push_back(Ty); } -void ValueEnumerator::OptimizeTypes(void) { - // If the types form a DAG, this will compute a topological sort and - // no forward references will be needed when reading them in. - // If there are cycles, this is a simple but reasonable heuristic for - // the minimum feedback arc set problem. - const unsigned NumTypes = Types.size(); - std::vector<TypeAndDeps> TypeDeps; - TypeDeps.resize(NumTypes); - - for (unsigned I = 0; I < NumTypes; ++I) { - const Type *Ty = Types[I]; - TypeDeps[I].Ty = Ty; - TypeDeps[I].NumDeps = 0; - } - - for (unsigned I = 0; I < NumTypes; ++I) { - const Type *Ty = TypeDeps[I].Ty; - for (Type::subtype_iterator I2 = Ty->subtype_begin(), - E2 = Ty->subtype_end(); I2 != E2; ++I2) { - const Type *InnerType = I2->get(); - unsigned InnerIndex = TypeMap.lookup(InnerType) - 1; - TypeDeps[InnerIndex].NumDeps++; - } - } - array_pod_sort(TypeDeps.begin(), TypeDeps.end(), CompareByDeps); - - SmallPtrSet<const Type*, 16> Visited; - Types.clear(); - Types.reserve(NumTypes); - for (unsigned I = 0; I < NumTypes; ++I) { - VisitType(TypeDeps[I].Ty, Visited, Types); - } -} unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { InstructionMapType::const_iterator I = InstructionMap.find(Inst); - assert (I != InstructionMap.end() && "Instruction is not mapped!"); + assert(I != InstructionMap.end() && "Instruction is not mapped!"); return I->second; } @@ -235,14 +165,6 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { } -/// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol -/// table. -void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) { - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); - TI != TE; ++TI) - EnumerateType(TI->second); -} - /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol /// table into the values table. void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { @@ -394,20 +316,40 @@ void ValueEnumerator::EnumerateValue(const Value *V) { void ValueEnumerator::EnumerateType(const Type *Ty) { - unsigned &TypeID = TypeMap[Ty]; + unsigned *TypeID = &TypeMap[Ty]; // We've already seen this type. - if (TypeID) + if (*TypeID) return; - // First time we saw this type, add it. - Types.push_back(Ty); - TypeID = Types.size(); - - // Enumerate subtypes. + // If it is a non-anonymous struct, mark the type as being visited so that we + // don't recursively visit it. This is safe because we allow forward + // references of these in the bitcode reader. + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isAnonymous()) + *TypeID = ~0U; + + // Enumerate all of the subtypes before we enumerate this type. This ensures + // that the type will be enumerated in an order that can be directly built. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) EnumerateType(*I); + + // Refresh the TypeID pointer in case the table rehashed. + TypeID = &TypeMap[Ty]; + + // Check to see if we got the pointer another way. This can happen when + // enumerating recursive types that hit the base case deeper than they start. + // + // If this is actually a struct that we are treating as forward ref'able, + // then emit the definition now that all of its contents are available. + if (*TypeID && *TypeID != ~0U) + return; + + // Add this type now that its contents are all happily enumerated. + Types.push_back(Ty); + + *TypeID = Types.size(); } // Enumerate the types for the specified value. If the value is a constant, diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 1e42a2667669..6617b60deb26 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -30,7 +30,6 @@ class Module; class MDNode; class NamedMDNode; class AttrListPtr; -class TypeSymbolTable; class ValueSymbolTable; class MDSymbolTable; @@ -135,7 +134,6 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); - void OptimizeTypes(); void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Value *MD); @@ -146,7 +144,6 @@ private: void EnumerateOperandType(const Value *V); void EnumerateAttributes(const AttrListPtr &PAL); - void EnumerateTypeSymbolTable(const TypeSymbolTable &ST); void EnumerateValueSymbolTable(const ValueSymbolTable &ST); void EnumerateNamedMetadata(const Module *M); }; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index dca1d29665ac..25842a7876a2 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -114,12 +115,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: AggressiveAntiDepBreaker(MachineFunction& MFi, - TargetSubtarget::RegClassVector& CriticalPathRCs) : + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)), + RegClassInfo(RCI), State(NULL) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ @@ -402,7 +404,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -477,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = MI->getDesc().OpInfo[i].getRegClass(TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -618,9 +620,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( const TargetRegisterClass *SuperRC = TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); - const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); - const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); - if (RB == RE) { + ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC); + if (Order.empty()) { DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); return false; } @@ -628,17 +629,17 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG(dbgs() << "\tFind Registers:"); if (RenameOrder.count(SuperRC) == 0) - RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE)); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); - const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC]; - const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR); - TargetRegisterClass::iterator R = OrigR; + unsigned OrigR = RenameOrder[SuperRC]; + unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); + unsigned R = OrigR; do { - if (R == RB) R = RE; + if (R == 0) R = Order.size(); --R; - const unsigned NewSuperReg = *R; + const unsigned NewSuperReg = Order[R]; // Don't consider non-allocatable registers - if (!AllocatableSet.test(NewSuperReg)) continue; + if (!RegClassInfo.isAllocatable(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -819,7 +820,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) { + if (!RegClassInfo.isAllocatable(AntiDepReg)) { // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index b7ddafc32980..706778485429 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -23,13 +23,15 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { +class RegisterClassInfo; + /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { @@ -117,11 +119,7 @@ namespace llvm { MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - - /// AllocatableSet - The set of allocatable registers. - /// We'll be ignoring anti-dependencies on non-allocatable registers, - /// because they may not be safe to break. - const BitVector AllocatableSet; + const RegisterClassInfo &RegClassInfo; /// CriticalPathSet - The set of registers that should only be /// renamed if they are on the critical path. @@ -133,7 +131,8 @@ namespace llvm { public: AggressiveAntiDepBreaker(MachineFunction& MFi, - TargetSubtarget::RegClassVector& CriticalPathRCs); + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. @@ -158,8 +157,8 @@ namespace llvm { void FinishBlock(); private: - typedef std::map<const TargetRegisterClass *, - TargetRegisterClass::const_iterator> RenameOrderType; + /// Keep track of a position in the allocation order for each regclass. + typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; /// IsImplicitDefUse - Return true if MO represents a register /// that is both implicitly used and defined in MI diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index a8ee2b6357c3..1005f102bea6 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -41,21 +41,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, if (HintPair.first) { const TargetRegisterInfo &TRI = VRM.getTargetRegInfo(); // The remaining allocation order may depend on the hint. - const unsigned *B, *E; - tie(B, E) = TRI.getAllocationOrder(RC, HintPair.first, Hint, - VRM.getMachineFunction()); - - // Empty allocation order? - if (B == E) + ArrayRef<unsigned> Order = + TRI.getRawAllocationOrder(RC, HintPair.first, Hint, + VRM.getMachineFunction()); + if (Order.empty()) return; // Copy the allocation order with reserved registers removed. OwnedBegin = true; - unsigned *P = new unsigned[E - B]; + unsigned *P = new unsigned[Order.size()]; Begin = P; - for (; B != E; ++B) - if (!RCI.isReserved(*B)) - *P++ = *B; + for (unsigned i = 0; i != Order.size(); ++i) + if (!RCI.isReserved(Order[i])) + *P++ = Order[i]; End = P; // Target-dependent hints require resolution. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 161afbafb57b..7f314eed3ae6 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -575,6 +575,8 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } } else if (MI->getOperand(0).isImm()) { OS << MI->getOperand(0).getImm(); + } else if (MI->getOperand(0).isCImm()) { + MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { assert(MI->getOperand(0).isReg() && "Unknown operand type"); if (MI->getOperand(0).getReg() == 0) { @@ -1211,9 +1213,9 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list for which emitUsedDirectiveFor /// is true, as being used with this directive. -void AsmPrinter::EmitLLVMUsedList(Constant *List) { +void AsmPrinter::EmitLLVMUsedList(const Constant *List) { // Should be an array of 'i8*'. - ConstantArray *InitList = dyn_cast<ConstantArray>(List); + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (InitList == 0) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { @@ -1226,11 +1228,11 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { /// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the /// function pointers, ignoring the init priority. -void AsmPrinter::EmitXXStructorList(Constant *List) { +void AsmPrinter::EmitXXStructorList(const Constant *List) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority, which we ignore. if (!isa<ConstantArray>(List)) return; - ConstantArray *InitList = cast<ConstantArray>(List); + const ConstantArray *InitList = cast<ConstantArray>(List); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. @@ -1516,6 +1518,13 @@ static void EmitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + + const TargetData &TD = *AP.TM.getTargetData(); + unsigned Size = TD.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + CV->getType()->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer.EmitZeros(Padding, AddrSpace); } static void EmitGlobalConstantStruct(const ConstantStruct *CS, @@ -1925,7 +1934,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return false; // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *PI; + MachineBasicBlock *Pred = *PI; if (!Pred->isLayoutSuccessor(MBB)) return false; @@ -1934,9 +1943,28 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { if (Pred->empty()) return true; - // Otherwise, check the last instruction. - const MachineInstr &LastInst = Pred->back(); - return !LastInst.getDesc().isBarrier(); + // Check the terminators in the previous blocks + for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(), + IE = Pred->end(); II != IE; ++II) { + MachineInstr &MI = *II; + + // If it is not a simple branch, we are in a table somewhere. + if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch()) + return false; + + // If we are the operands of one of the branches, this is not + // a fall through. + for (MachineInstr::mop_iterator OI = MI.operands_begin(), + OE = MI.operands_end(); OI != OE; ++OI) { + const MachineOperand& OP = *OI; + if (OP.isJTI()) + return false; + if (OP.isMBB() && OP.getMBB() == MBB) + return false; + } + } + + return true; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index c6166e2365a5..5ac455e1a1a1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetAsmParser.h" #include "llvm/Target/TargetMachine.h" @@ -111,7 +112,16 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI)); - OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM)); + + // FIXME: It would be nice if we can avoid createing a new instance of + // MCSubtargetInfo here given TargetSubtargetInfo is available. However, + // we have to watch out for asm directives which can change subtarget + // state. e.g. .code 16, .code 32. + OwningPtr<MCSubtargetInfo> + STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(), + TM.getTargetCPU(), + TM.getTargetFeatureString())); + OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index bff1a3529984..1fe035efde3e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -491,7 +491,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, +bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { unsigned CIBitWidth = CI->getBitWidth(); if (CIBitWidth <= 64) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 60a9b2872b41..213c7fc630d3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -181,7 +181,7 @@ public: /// addConstantValue - Add constant value entry in variable DIE. bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. bool addConstantFPValue(DIE *Die, const MachineOperand &MO); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8845bfac5e8d..125e1e86b12f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -229,6 +229,7 @@ public: void DbgScope::dump() const { raw_ostream &err = dbgs(); err.indent(IndentLevel); + err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; const MDNode *N = Desc; N->dump(); if (AbstractScope) @@ -618,6 +619,21 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { return ScopeDIE; } +/// isUnsignedDIType - Return true if type encoding is unsigned. +static bool isUnsignedDIType(DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.Verify()) + return isUnsignedDIType(DTy.getTypeDerivedFrom()); + + DIBasicType BTy(Ty); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { @@ -718,6 +734,11 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { else if (DVInsn->getOperand(0).isFPImm()) updated = VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + updated = + VariableCU->addConstantValue(VariableDie, + DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DV->getType())); } else { VariableCU->addVariableAddress(DV, VariableDie, Asm->getDebugValueLocation(DVInsn)); @@ -913,22 +934,6 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { return I->second; } -/// isUnsignedDIType - Return true if type encoding is unsigned. -static bool isUnsignedDIType(DIType Ty) { - DIDerivedType DTy(Ty); - if (DTy.Verify()) - return isUnsignedDIType(DTy.getTypeDerivedFrom()); - - DIBasicType BTy(Ty); - if (BTy.Verify()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char) - return true; - } - return false; -} - // Return const exprssion if value is a GEP to access merged global // constant. e.g. // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) @@ -1017,7 +1022,7 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } else { TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } - } else if (ConstantInt *CI = + } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { @@ -1310,7 +1315,6 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, void DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &Processed) { - const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { @@ -1320,11 +1324,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DIVariable DV(Var); const std::pair<unsigned, DebugLoc> &VP = VI->second; - DbgScope *Scope = 0; - if (const MDNode *IA = VP.second.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) - Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); + DbgScope *Scope = findDbgScope(VP.second); // If variable scope is not found then skip this variable. if (Scope == 0) @@ -1351,6 +1351,34 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } +/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting +/// at MI. +static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, + const MCSymbol *FLabel, + const MCSymbol *SLabel, + const MachineInstr *MI) { + const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + if (MI->getNumOperands() != 3) { + MachineLocation MLoc = Asm->getDebugValueLocation(MI); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + MachineLocation MLoc; + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); + } + if (MI->getOperand(0).isImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); + + assert (0 && "Unexpected 3 operand DBG_VALUE instruction!"); + return DotDebugLocEntry(); +} + /// collectVariableInfo - Populate DbgScope entries with variables' info. void DwarfDebug::collectVariableInfo(const MachineFunction *MF, @@ -1379,7 +1407,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DISubprogram(DV.getContext()).describes(MF->getFunction())) Scope = CurrentFnDbgScope; else - Scope = findDbgScope(MInsn); + Scope = findDbgScope(MInsn->getDebugLoc()); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -1424,6 +1452,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, SLabel = FunctionEndSym; else { const MachineInstr *End = HI[1]; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" + << "\t" << *Begin << "\t" << *End << "\n"); if (End->isDebugValue()) SLabel = getLabelBeforeInsn(End); else { @@ -1435,25 +1465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - MachineLocation MLoc; - if (Begin->getNumOperands() == 3) { - if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) { - MLoc.set(Begin->getOperand(0).getReg(), - Begin->getOperand(1).getImm()); - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); - } - // FIXME: Handle isFPImm also. - else if (Begin->getOperand(0).isImm()) { - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, - Begin->getOperand(0).getImm())); - } - } else { - MLoc = Asm->getDebugValueLocation(Begin); - DotDebugLocEntries. - push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); - } + DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -1550,8 +1562,12 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, - const MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) { + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); + if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -1560,22 +1576,12 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, DbgScopeMap.insert(std::make_pair(Scope, WScope)); if (DIDescriptor(Scope).isLexicalBlock()) { DbgScope *Parent = - getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); + getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope)); WScope->setParent(Parent); Parent->addScope(WScope); - } - - if (!WScope->getParent()) { - StringRef SPName = DISubprogram(Scope).getLinkageName(); - // We used to check only for a linkage name, but that fails - // since we began omitting the linkage name for private - // functions. The new way is to check for the name in metadata, - // but that's not supported in old .ll test cases. Ergo, we - // check both. - if (SPName == Asm->MF->getFunction()->getName() || - DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) - CurrentFnDbgScope = WScope; - } + } else if (DIDescriptor(Scope).isSubprogram() + && DISubprogram(Scope).describes(Asm->MF->getFunction())) + CurrentFnDbgScope = WScope; return WScope; } @@ -1587,37 +1593,14 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); - DILocation DL(InlinedAt); + InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope; DbgScope *Parent = - getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation()); + getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt)); WScope->setParent(Parent); Parent->addScope(WScope); - - ConcreteScopes[InlinedAt] = WScope; - return WScope; } -/// hasValidLocation - Return true if debug location entry attached with -/// machine instruction encodes valid location info. -static bool hasValidLocation(LLVMContext &Ctx, - const MachineInstr *MInsn, - const MDNode *&Scope, const MDNode *&InlinedAt) { - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) return false; - - const MDNode *S = DL.getScope(Ctx); - - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(S).isCompileUnit()) return false; - - Scope = S; - InlinedAt = DL.getInlinedAt(Ctx); - return true; -} - /// calculateDominanceGraph - Calculate dominance graph for DbgScope /// hierarchy. static void calculateDominanceGraph(DbgScope *Scope) { @@ -1648,21 +1631,24 @@ static void calculateDominanceGraph(DbgScope *Scope) { /// printDbgScopeInfo - Print DbgScope info for each machine instruction. static -void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, +void printDbgScopeInfo(const MachineFunction *MF, DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) { #ifndef NDEBUG + LLVMContext &Ctx = MF->getFunction()->getContext(); unsigned PrevDFSIn = 0; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - const MDNode *Scope = NULL; - const MDNode *InlinedAt = NULL; + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; // Check if instruction has valid location information. - if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + DebugLoc MIDL = MInsn->getDebugLoc(); + if (!MIDL.isUnknown()) { + MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx); dbgs() << " [ "; if (InlinedAt) dbgs() << "*"; @@ -1692,11 +1678,9 @@ bool DwarfDebug::extractScopeInformation() { return false; // Scan each instruction and create scopes. First build working set of scopes. - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); SmallVector<DbgRange, 4> MIRanges; DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; - const MDNode *PrevScope = NULL; - const MDNode *PrevInlinedAt = NULL; + DebugLoc PrevDL; const MachineInstr *RangeBeginMI = NULL; const MachineInstr *PrevMI = NULL; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); @@ -1704,17 +1688,16 @@ bool DwarfDebug::extractScopeInformation() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - const MDNode *Scope = NULL; - const MDNode *InlinedAt = NULL; // Check if instruction has valid location information. - if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + const DebugLoc MIDL = MInsn->getDebugLoc(); + if (MIDL.isUnknown()) { PrevMI = MInsn; continue; } // If scope has not changed then skip this instruction. - if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { + if (MIDL == PrevDL) { PrevMI = MInsn; continue; } @@ -1727,9 +1710,13 @@ bool DwarfDebug::extractScopeInformation() { // If we have alread seen a beginning of a instruction range and // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. + DEBUG(dbgs() << "Creating new instruction range :\n"); + DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI); + DEBUG(dbgs() << "End Range at " << *PrevMI); + DEBUG(dbgs() << "Next Range starting at " << *MInsn); + DEBUG(dbgs() << "------------------------\n"); DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, - PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); MIRanges.push_back(R); } @@ -1738,16 +1725,15 @@ bool DwarfDebug::extractScopeInformation() { // Reset previous markers. PrevMI = MInsn; - PrevScope = Scope; - PrevInlinedAt = InlinedAt; + PrevDL = MIDL; } } // Create last instruction range. - if (RangeBeginMI && PrevMI && PrevScope) { + if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) { DbgRange R(RangeBeginMI, PrevMI); MIRanges.push_back(R); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL); } if (!CurrentFnDbgScope) @@ -1755,7 +1741,7 @@ bool DwarfDebug::extractScopeInformation() { calculateDominanceGraph(CurrentFnDbgScope); if (PrintDbgScope) - printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap); + printDbgScopeInfo(Asm->MF, MI2ScopeMap); // Find ranges of instructions covered by each DbgScope; DbgScope *PrevDbgScope = NULL; @@ -1842,8 +1828,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - /// ProcessedArgs - Collection of arguments already processed. - SmallPtrSet<const MDNode *, 8> ProcessedArgs; const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); /// LiveUserVar - Map physreg numbers to the MDNode they contain. std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); @@ -1883,8 +1867,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue()) { // Coalesce identical entries at the end of History. if (History.size() >= 2 && - Prev->isIdenticalTo(History[History.size() - 2])) + Prev->isIdenticalTo(History[History.size() - 2])) { + DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n" + << "\t" << *Prev + << "\t" << *History[History.size() - 2] << "\n"); History.pop_back(); + } // Terminate old register assignments that don't reach MI; MachineFunction::const_iterator PrevMBB = Prev->getParent(); @@ -1894,9 +1882,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // its basic block. MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) + if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. + DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n" + << "\t" << *Prev << "\n"); History.pop_back(); + } else { // Terminate after LastMI. History.push_back(LastMI); @@ -2053,10 +2044,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); + InlinedDbgScopeMap.clear(); DeleteContainerSeconds(DbgScopeMap); UserVariables.clear(); DbgValues.clear(); - ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); AbstractVariables.clear(); @@ -2083,22 +2074,17 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { return true; } -/// findDbgScope - Find DbgScope for the debug loc attached with an -/// instruction. -DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { - DbgScope *Scope = NULL; - LLVMContext &Ctx = - MInsn->getParent()->getParent()->getFunction()->getContext(); - DebugLoc DL = MInsn->getDebugLoc(); - +/// findDbgScope - Find DbgScope for the debug loc. +DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) { if (DL.isUnknown()) - return Scope; + return NULL; - if (const MDNode *IA = DL.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) + DbgScope *Scope = NULL; + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + if (MDNode *IA = DL.getInlinedAt(Ctx)) + Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA)); + else Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - return Scope; } @@ -2597,56 +2583,61 @@ void DwarfDebug::emitDebugLoc() { MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); Asm->EmitLabelDifference(end, begin, 2); Asm->OutStreamer.EmitLabel(begin); - if (Entry.isConstant()) { + if (Entry.isInt()) { DIBasicType BTy(DV.getType()); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { Asm->OutStreamer.AddComment("DW_OP_consts"); Asm->EmitInt8(dwarf::DW_OP_consts); - Asm->EmitSLEB128(Entry.getConstant()); + Asm->EmitSLEB128(Entry.getInt()); } else { Asm->OutStreamer.AddComment("DW_OP_constu"); Asm->EmitInt8(dwarf::DW_OP_constu); - Asm->EmitULEB128(Entry.getConstant()); + Asm->EmitULEB128(Entry.getInt()); } - } else if (DV.hasComplexAddress()) { - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); - Asm->OutStreamer.AddComment("DW_OP_deref"); - Asm->EmitInt8(dwarf::DW_OP_deref); - Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitSLEB128(DV.getAddrElement(1)); + } else if (Entry.isLocation()) { + if (!DV.hasComplexAddress()) + // Regular entry. + Asm->EmitDwarfRegOp(Entry.Loc); + else { + // Complex address entry. + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (Entry.Loc.getOffset()) { + i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + Asm->OutStreamer.AddComment("DW_OP_deref"); + Asm->EmitInt8(dwarf::DW_OP_deref); + Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitSLEB128(DV.getAddrElement(1)); + } else { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(Loc); + i = 2; + } } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); - i = 2; + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); } - } else { - Asm->EmitDwarfRegOp(Entry.Loc); - } - - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - Asm->EmitInt8(dwarf::DW_OP_plus_uconst); - Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) - Asm->EmitInt8(dwarf::DW_OP_deref); - else llvm_unreachable("unknown Opcode found in complex address"); } - } else { - // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); } + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. Asm->OutStreamer.EmitLabel(end); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index abda2e61d31e..b2450064e3d0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -69,17 +69,35 @@ typedef struct DotDebugLocEntry { const MDNode *Variable; bool Merged; bool Constant; - int64_t iConstant; + enum EntryType { + E_Location, + E_Integer, + E_ConstantFP, + E_ConstantInt + }; + enum EntryType EntryKind; + + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constants; DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false), - Constant(false), iConstant(0) {} + Constant(false) { Constants.Int = 0;} DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false), iConstant(0) {} + Constant(false) { Constants.Int = 0; EntryKind = E_Location; } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) : Begin(B), End(E), Variable(0), Merged(false), - Constant(true), iConstant(i) {} + Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) + : Begin(B), End(E), Variable(0), Merged(false), + Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -91,8 +109,13 @@ typedef struct DotDebugLocEntry { Next->Begin = Begin; Merged = true; } - bool isConstant() { return Constant; } - int64_t getConstant() { return iConstant; } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() { return Constants.Int; } + const ConstantFP *getConstantFP() { return Constants.CFP; } + const ConstantInt *getConstantInt() { return Constants.CIP; } } DotDebugLocEntry; //===----------------------------------------------------------------------===// @@ -178,12 +201,10 @@ class DwarfDebug { /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. - /// DenseMap<const MDNode *, DbgScope *> DbgScopeMap; - /// ConcreteScopes - Tracks the concrete scopees in the current function. - /// These scopes are also included in DbgScopeMap. - DenseMap<const MDNode *, DbgScope *> ConcreteScopes; + /// InlinedDbgScopeMap - Tracks inlined function scopes in current function. + DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap; /// AbstractScopes - Tracks the abstract scopes a module. These scopes are /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. @@ -296,7 +317,7 @@ private: void assignAbbrevNumber(DIEAbbrev &Abbrev); /// getOrCreateDbgScope - Create DbgScope for the scope. - DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); + DbgScope *getOrCreateDbgScope(DebugLoc DL); DbgScope *getOrCreateAbstractScope(const MDNode *N); @@ -427,9 +448,8 @@ private: /// is found. Update FI to hold value of the index. bool findVariableFrameIndex(const DbgVariable *V, int *FI); - /// findDbgScope - Find DbgScope for the debug loc attached with an - /// instruction. - DbgScope *findDbgScope(const MachineInstr *MI); + /// findDbgScope - Find DbgScope for the debug loc. + DbgScope *findDbgScope(DebugLoc DL); /// identifyScopeMarkers() - Indentify instructions that are marking /// beginning of or end of a scope. diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 967a2783da14..1f992faaadb5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -512,6 +512,8 @@ void DwarfException::EmitExceptionTable() { SizeAlign = 0; } + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + // SjLj Exception handling if (IsSJLJ) { Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); @@ -525,14 +527,30 @@ void DwarfException::EmitExceptionTable() { I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { const CallSiteEntry &S = *I; + if (VerboseAsm) { + // Emit comments that decode the call site. + Asm->OutStreamer.AddComment(Twine(">> Call Site ") + + llvm::utostr(idx) + " <<"); + Asm->OutStreamer.AddComment(Twine(" On exception at call site ") + + llvm::utostr(idx)); + + if (S.Action == 0) + Asm->OutStreamer.AddComment(" Action: cleanup"); + else + Asm->OutStreamer.AddComment(Twine(" Action: ") + + llvm::utostr((S.Action - 1) / 2 + 1)); + + Asm->OutStreamer.AddBlankLine(); + } + // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - Asm->EmitULEB128(idx, "Landing pad"); + Asm->EmitULEB128(idx); // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of // the action table), and 0 indicates that there are no actions. - Asm->EmitULEB128(S.Action, "Action"); + Asm->EmitULEB128(S.Action); } } else { // DWARF Exception handling @@ -562,6 +580,7 @@ void DwarfException::EmitExceptionTable() { // Add extra padding if it wasn't added to the TType base offset. Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + unsigned Entry = 0; for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { const CallSiteEntry &S = *I; @@ -576,19 +595,38 @@ void DwarfException::EmitExceptionTable() { if (EndLabel == 0) EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + if (VerboseAsm) { + // Emit comments that decode the call site. + Asm->OutStreamer.AddComment(Twine(">> Call Site ") + + llvm::utostr(++Entry) + " <<"); + Asm->OutStreamer.AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + + if (!S.PadLabel) { + Asm->OutStreamer.AddComment(" has no landing pad"); + } else { + Asm->OutStreamer.AddComment(Twine(" jumps to ") + + S.PadLabel->getName()); + + if (S.Action == 0) + Asm->OutStreamer.AddComment(" On action: cleanup"); + else + Asm->OutStreamer.AddComment(Twine(" On action: ") + + llvm::utostr((S.Action - 1) / 2 + 1)); + } + + Asm->OutStreamer.AddBlankLine(); + } + // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. - Asm->OutStreamer.AddComment("Region start"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); - - Asm->OutStreamer.AddComment("Region length"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - Asm->OutStreamer.AddComment("Landing pad"); if (!S.PadLabel) Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); else @@ -597,45 +635,63 @@ void DwarfException::EmitExceptionTable() { // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of // the action table), and 0 indicates that there are no actions. - Asm->EmitULEB128(S.Action, "Action"); + Asm->EmitULEB128(S.Action); } } // Emit the Action Table. - if (Actions.size() != 0) { - Asm->OutStreamer.AddComment("-- Action Record Table --"); - Asm->OutStreamer.AddBlankLine(); - } - + int Entry = 0; for (SmallVectorImpl<ActionEntry>::const_iterator I = Actions.begin(), E = Actions.end(); I != E; ++I) { const ActionEntry &Action = *I; - Asm->OutStreamer.AddComment("Action Record"); - Asm->OutStreamer.AddBlankLine(); + + if (VerboseAsm) { + // Emit comments that decode the action table. + Asm->OutStreamer.AddComment(Twine(">> Action Record ") + + llvm::utostr(++Entry) + " <<"); + if (Action.ValueForTypeID >= 0) + Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") + + llvm::itostr(Action.ValueForTypeID)); + else + Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") + + llvm::itostr(Action.ValueForTypeID)); + + if (Action.NextAction == 0) { + Asm->OutStreamer.AddComment(" No further actions"); + } else { + unsigned NextAction = Entry + (Action.NextAction + 1) / 2; + Asm->OutStreamer.AddComment(Twine(" Continue to action ") + + llvm::utostr(NextAction)); + } + + Asm->OutStreamer.AddBlankLine(); + } // Type Filter // // Used by the runtime to match the type of the thrown exception to the // type of the catch clauses or the types in the exception specification. - Asm->EmitSLEB128(Action.ValueForTypeID, " TypeInfo index"); + Asm->EmitSLEB128(Action.ValueForTypeID); // Action Record // // Self-relative signed displacement in bytes of the next action record, // or 0 if there is no next action record. - Asm->EmitSLEB128(Action.NextAction, " Next action"); + Asm->EmitSLEB128(Action.NextAction); } // Emit the Catch TypeInfos. - if (!TypeInfos.empty()) { - Asm->OutStreamer.AddComment("-- Catch TypeInfos --"); + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); Asm->OutStreamer.AddBlankLine(); + Entry = TypeInfos.size(); } + for (std::vector<const GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; - - Asm->OutStreamer.AddComment("TypeInfo"); + if (VerboseAsm) + Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--)); if (GV) Asm->EmitReference(GV, TTypeEncoding); else @@ -644,14 +700,21 @@ void DwarfException::EmitExceptionTable() { } // Emit the Exception Specifications. - if (!FilterIds.empty()) { - Asm->OutStreamer.AddComment("-- Filter IDs --"); + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); Asm->OutStreamer.AddBlankLine(); + Entry = 0; } for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { unsigned TypeID = *I; - Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0); + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry)); + } + + Asm->EmitULEB128(TypeID); } Asm->EmitAlignment(2); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 719cd264f684..99090a8269d4 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -108,6 +108,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); + // Remove the block. MF->erase(MBB); } @@ -171,6 +174,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MachineModuleInfo *mmi) { if (!tii) return false; + TriedMerging.clear(); + TII = tii; TRI = tri; MMI = mmi; @@ -361,11 +366,31 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(prior(CurMBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) + if (RegsLiveAtExit[i]) + NewMBB->addLiveIn(i); + } +} + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); + ++NumTailMerge; } @@ -394,16 +419,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); // For targets that use the register scavenger, we must maintain LiveIns. - if (RS) { - RS->enterBasicBlock(&CurMBB); - if (!CurMBB.empty()) - RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) - NewMBB->addLiveIn(i); - } + MaintainLiveIns(&CurMBB, NewMBB); return NewMBB; } @@ -416,10 +432,10 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isCall()) Time += 10; - else if (TID.mayLoad() || TID.mayStore()) + else if (MCID.mayLoad() || MCID.mayStore()) Time += 2; else ++Time; @@ -799,14 +815,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E && MergePotentials.size() < TailMergeThreshold; ++I) { + if (TriedMerging.count(I)) + continue; if (I->succ_empty()) MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); // See if we can do any tail merging on those. - if (MergePotentials.size() < TailMergeThreshold && - MergePotentials.size() >= 2) + if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). @@ -830,15 +853,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + if (I->pred_size() >= 2) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); - P != E2; ++P) { + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; // Skip blocks that loop to themselves, can't tail merge these. if (PBB == IBB) continue; @@ -891,6 +916,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB); // Reinsert an unconditional branch if needed. diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 4daf4ecfe599..df795dfc248e 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -10,6 +10,7 @@ #ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP #define LLVM_CODEGEN_BRANCHFOLDING_HPP +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include <vector> @@ -47,6 +48,7 @@ namespace llvm { }; typedef std::vector<MergePotentialsElt>::iterator MPIterator; std::vector<MergePotentialsElt> MergePotentials; + SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; class SameTailElt { MPIterator MPIter; @@ -93,6 +95,8 @@ namespace llvm { bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); + void MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index c726d924d281..06d2a959ac84 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -33,6 +33,8 @@ add_llvm_library(LLVMCodeGen LocalStackSlotAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp + MachineBlockFrequency.cpp + MachineBranchProbabilityInfo.cpp MachineCSE.cpp MachineDominators.cpp MachineFunction.cpp @@ -58,7 +60,6 @@ add_llvm_library(LLVMCodeGen Passes.cpp PeepholeOptimizer.cpp PostRASchedulerList.cpp - PreAllocSplitting.cpp ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp @@ -78,7 +79,6 @@ add_llvm_library(LLVMCodeGen ScoreboardHazardRecognizer.cpp ShadowStackGC.cpp ShrinkWrapping.cpp - SimpleRegisterCoalescing.cpp SjLjEHPrepare.cpp SlotIndexes.cpp Spiller.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 5d722ee34f7e..e6b3bbca2068 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -188,6 +188,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); const TargetRegisterClass *OldRC = MRI.getRegClass(reg); const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); @@ -202,8 +203,11 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { // TRI doesn't have accurate enough information to model this yet. if (I.getOperand().getSubReg()) return; + // Inline asm instuctions don't remember their constraints. + if (I->isInlineAsm()) + return; const TargetRegisterClass *OpRC = - I->getDesc().getRegClass(I.getOperandNo(), TRI); + TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI); if (OpRC) NewRC = getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 515e6f9fde87..489746cf3c72 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -37,13 +37,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); - initializePreAllocSplittingPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); initializeRALinScanPass(Registry); - initializeRegisterCoalescerAnalysisGroup(Registry); + initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); - initializeSimpleRegisterCoalescingPass(Registry); initializeSlotIndexesPass(Registry); initializeLoopSplitterPass(Registry); initializeStackProtectorPass(Registry); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 51d984ffac0f..84c4d59c0e41 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -27,12 +27,12 @@ using namespace llvm; CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi) : +CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)), + RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0) {} @@ -207,7 +207,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -295,7 +295,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -385,11 +385,9 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned LastNewReg, const TargetRegisterClass *RC) { - for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), - RE = RC->allocation_order_end(MF); R != RE; ++R) { - unsigned NewReg = *R; - // Don't consider non-allocatable registers - if (!AllocatableSet.test(NewReg)) continue; + ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned NewReg = Order[i]; // Don't replace a register with itself. if (NewReg == AntiDepReg) continue; // Don't replace a register with one that was recently used to repair @@ -534,7 +532,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - if (!AllocatableSet.test(AntiDepReg)) + if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.count(AntiDepReg)) diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 5bbb8f525f54..07107802972d 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +28,7 @@ #include <map> namespace llvm { +class RegisterClassInfo; class TargetInstrInfo; class TargetRegisterInfo; @@ -35,6 +37,7 @@ class TargetRegisterInfo; MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; /// AllocatableSet - The set of allocatable registers. /// We'll be ignoring anti-dependencies on non-allocatable registers, @@ -66,7 +69,7 @@ class TargetRegisterInfo; SmallSet<unsigned, 4> KeepRegs; public: - CriticalAntiDepBreaker(MachineFunction& MFi); + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); ~CriticalAntiDepBreaker(); /// Start - Initialize anti-dep breaking for a new basic block. diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index fdc1d9142140..6de6c0cb81bd 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -110,9 +110,14 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs.set(Reg); } - // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs - // are not live across blocks, but some targets (x86) can have flags live - // out of a block. + // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not + // live across blocks, but some targets (x86) can have flags live out of a + // block. + for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), + E = MBB->succ_end(); S != E; S++) + for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin(); + LI != (*S)->livein_end(); LI++) + LivePhysRegs.set(*LI); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 22c5465bf9fa..03604b0a170f 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -336,8 +336,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = - CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), - "eh.sel.catch.all", II); + CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II); NewSelector->setTailCall(II->isTailCall()); NewSelector->setAttributes(II->getAttributes()); @@ -497,10 +496,8 @@ bool DwarfEHPrepare::LowerUnwindsAndResumes() { // Find the rewind function if we didn't already. if (!RewindFunction) { LLVMContext &Ctx = ResumeInsts[0]->getContext(); - std::vector<const Type*> - Params(1, Type::getInt8PtrTy(Ctx)); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Params, false); + Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); } diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index fa2319bff704..d977651c32f7 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -659,11 +659,11 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the /// function pointers, ignoring the init priority. -void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { +void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) { // Should be an array of '{ i32, void ()* }' structs. The first value is the // init priority, which we ignore. if (List->isNullValue()) return; - ConstantArray *InitList = cast<ConstantArray>(List); + const ConstantArray *InitList = cast<ConstantArray>(List); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { if (InitList->getOperand(i)->isNullValue()) continue; diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index b8bac5598ecf..6f7fbace8aba 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -232,7 +232,7 @@ namespace llvm { void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, ELFSection &GblS, int64_t Offset = 0); bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - void EmitXXStructorList(Constant *List, ELFSection &Xtor); + void EmitXXStructorList(const Constant *List, ELFSection &Xtor); void EmitRelocations(); void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index 646e01407a4f..a7aba89b87f3 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -39,7 +39,7 @@ void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const { bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { MF = &mf; EC.clear(); - EC.grow(2 * MF->size()); + EC.grow(2 * MF->getNumBlockIDs()); for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index ebc2fc91efa3..a67140ece4a5 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -62,8 +62,8 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.usesCustomInsertionHook()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 8b2c98161644..6cb22778caf9 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -18,11 +18,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -173,10 +174,10 @@ namespace { private: bool ReverseBranchCondition(BBInfo &BBI); bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - float Prediction, float Confidence) const; + const BranchProbability &Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - float Prediction, float Confidence) const; + const BranchProbability &Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -203,19 +204,19 @@ namespace { bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Cycle, unsigned Extra, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, - Prediction, Confidence); + Prediction); } bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TCycle, unsigned TExtra, MachineBasicBlock &FBB, unsigned FCycle, unsigned FExtra, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { return TCycle > 0 && FCycle > 0 && TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, - Prediction, Confidence); + Prediction); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -450,7 +451,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -461,7 +462,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, - Prediction, Confidence)) + Prediction)) return false; Dups = TrueBBI.NonPredSize; } @@ -477,7 +478,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - float Prediction, float Confidence) const { + const BranchProbability &Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -499,8 +500,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, - Prediction, Confidence)) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction)) return false; Dups = Size; } @@ -651,12 +651,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (I->isDebugValue()) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isNotDuplicable()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isNotDuplicable()) BBI.CannotBeCopied = true; bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); + bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch(); if (!isCondBr) { if (!isPredicated) { @@ -751,8 +751,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, ScanInstructions(BBI); - // Unanalyzable or ends with fallthrough or unconditional branch. - if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) { + // Unanalyzable or ends with fallthrough or unconditional branch, or if is not + // considered for ifcvt anymore. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) { BBI.IsBeingAnalyzed = false; BBI.IsAnalyzed = true; return BBI; @@ -795,21 +796,20 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, // - backedge -> 90% taken // - early exit -> 20% taken // - branch predictor confidence -> 90% - float Prediction = 0.5f; - float Confidence = 0.9f; + BranchProbability Prediction(5, 10); MachineLoop *Loop = MLI->getLoopFor(BB); if (Loop) { if (TrueBBI.BB == Loop->getHeader()) - Prediction = 0.9f; + Prediction = BranchProbability(9, 10); else if (FalseBBI.BB == Loop->getHeader()) - Prediction = 0.1f; + Prediction = BranchProbability(1, 10); MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB); MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB); if (!TrueLoop || TrueLoop->getParentLoop() == Loop) - Prediction = 0.2f; + Prediction = BranchProbability(2, 10); else if (!FalseLoop || FalseLoop->getParentLoop() == Loop) - Prediction = 0.8f; + Prediction = BranchProbability(8, 10); } if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && @@ -817,7 +817,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, TrueBBI.ExtraCost), TrueBBI.ExtraCost2, *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, - Prediction, Confidence) && + Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -833,9 +833,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -848,17 +848,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && + if (ValidSimple(TrueBBI, Dups, Prediction) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - TrueBBI.ExtraCost2, Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -874,29 +874,29 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, - 1.0-Prediction, Confidence) && + Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, - 1.0-Prediction, Confidence) && + Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && + if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -1414,9 +1414,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { - const TargetInstrDesc &TID = I->getDesc(); + const MCInstrDesc &MCID = I->getDesc(); // Do not copy the end of the block branches. - if (IgnoreBr && TID.isBranch()) + if (IgnoreBr && MCID.isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 19ae333115c0..5547f735ba5e 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -180,11 +180,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass, /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, /// otherwise return 0. static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { - if (!MI->isCopy()) - return 0; - if (MI->getOperand(0).getSubReg() != 0) - return 0; - if (MI->getOperand(1).getSubReg() != 0) + if (!MI->isFullCopy()) return 0; if (MI->getOperand(0).getReg() == Reg) return MI->getOperand(1).getReg(); @@ -307,7 +303,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Best spill candidate seen so far. This must dominate UseVNI. SibValueInfo SVI(UseReg, UseVNI); MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); - unsigned SpillDepth = Loops.getLoopDepth(UseMBB); + MachineBasicBlock *SpillMBB = UseMBB; + unsigned SpillDepth = Loops.getLoopDepth(SpillMBB); bool SeenOrigPHI = false; // Original PHI met. do { @@ -320,7 +317,30 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Is this value a better spill candidate? if (!isRegToSpill(Reg)) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { + if (MBB == SpillMBB) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x<kill> + // + if (VNI->def < SVI.SpillVNI->def) { + DEBUG(dbgs() << " hoist in BB#" << MBB->getNumber() << ": " + << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << '\n'); + SVI.SpillReg = Reg; + SVI.SpillVNI = VNI; + } + } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { // This is a valid spill location dominating UseVNI. // Prefer to spill at a smaller loop depth. unsigned Depth = Loops.getLoopDepth(MBB); @@ -329,6 +349,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, << ':' << VNI->id << '@' << VNI->def << '\n'); SVI.SpillReg = Reg; SVI.SpillVNI = VNI; + SpillMBB = MBB; SpillDepth = Depth; } } @@ -429,6 +450,7 @@ void InlineSpiller::analyzeSiblingValues() { // Check possible sibling copies. if (VNI->isPHIDef() || VNI->getCopy()) { VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + assert(OrigVNI && "Def outside original live range"); if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index b1014a97fa03..a09bb39f8336 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "InterferenceCache.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -40,9 +41,18 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { E = RoundRobin; if (++RoundRobin == CacheEntries) RoundRobin = 0; - Entries[E].reset(PhysReg, LIUArray, TRI, MF); - PhysRegEntries[PhysReg] = E; - return &Entries[E]; + for (unsigned i = 0; i != CacheEntries; ++i) { + // Skip entries that are in use. + if (Entries[E].hasRefs()) { + if (++E == CacheEntries) + E = 0; + continue; + } + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; + } + llvm_unreachable("Ran out of interference cache entries."); } /// revalidate - LIU contents have changed, update tags. @@ -59,6 +69,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI, const MachineFunction *MF) { + assert(!hasRefs() && "Cannot reset cache entry with references"); // LIU's changed, invalidate cache. ++Tag; PhysReg = physReg; diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 6c36fa4021fb..7f0a27a41baa 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -43,6 +43,9 @@ class InterferenceCache { /// change. unsigned Tag; + /// RefCount - The total number of Cursor instances referring to this Entry. + unsigned RefCount; + /// MF - The current function. MachineFunction *MF; @@ -68,9 +71,10 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), Indexes(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {} void clear(MachineFunction *mf, SlotIndexes *indexes) { + assert(!hasRefs() && "Cannot clear cache entry with references"); PhysReg = 0; MF = mf; Indexes = indexes; @@ -78,6 +82,10 @@ class InterferenceCache { unsigned getPhysReg() const { return PhysReg; } + void addRef(int Delta) { RefCount += Delta; } + + bool hasRefs() const { return RefCount > 0; } + void revalidate(); /// valid - Return true if this is a valid entry for physReg. @@ -122,15 +130,48 @@ public: void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, const TargetRegisterInfo *); + /// getMaxCursors - Return the maximum number of concurrent cursors that can + /// be supported. + unsigned getMaxCursors() const { return CacheEntries; } + /// Cursor - The primary query interface for the block interference cache. class Cursor { Entry *CacheEntry; BlockInterference *Current; + + void setEntry(Entry *E) { + Current = 0; + // Update reference counts. Nothing happens when RefCount reaches 0, so + // we don't have to check for E == CacheEntry etc. + if (CacheEntry) + CacheEntry->addRef(-1); + CacheEntry = E; + if (CacheEntry) + CacheEntry->addRef(+1); + } + public: - /// Cursor - Create a cursor for the interference allocated to PhysReg and - /// all its aliases. - Cursor(InterferenceCache &Cache, unsigned PhysReg) - : CacheEntry(Cache.get(PhysReg)), Current(0) {} + /// Cursor - Create a dangling cursor. + Cursor() : CacheEntry(0), Current(0) {} + ~Cursor() { setEntry(0); } + + Cursor(const Cursor &O) : CacheEntry(0), Current(0) { + setEntry(O.CacheEntry); + } + + Cursor &operator=(const Cursor &O) { + setEntry(O.CacheEntry); + return *this; + } + + /// setPhysReg - Point this cursor to PhysReg's interference. + void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + // Release reference before getting a new one. That guarantees we can + // actually have CacheEntries live cursors. + setEntry(0); + if (PhysReg) + setEntry(Cache.get(PhysReg)); + } /// moveTo - Move cursor to basic block MBBNum. void moveToBlock(unsigned MBBNum) { diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 3861ddadf655..611886ff16a1 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -29,7 +29,7 @@ static void EnsureFunctionExists(Module &M, const char *Name, ArgIt ArgBegin, ArgIt ArgEnd, const Type *RetTy) { // Insert a correctly-typed definition now. - std::vector<const Type *> ParamTys; + std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back(I->getType()); M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); @@ -69,7 +69,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, // program already contains a function with this name. Module *M = CI->getParent()->getParent()->getParent(); // Get or insert the definition now. - std::vector<const Type *> ParamTys; + std::vector<Type *> ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) ParamTys.push_back((*I)->getType()); Constant* FCache = M->getOrInsertFunction(NewFn, @@ -77,7 +77,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, IRBuilder<> Builder(CI->getParent(), CI); SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); - CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end()); + CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); if (!CI->use_empty()) CI->replaceAllUsesWith(NewCI); @@ -353,6 +353,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { report_fatal_error("Code generator does not support intrinsic function '"+ Callee->getName()+"'!"); + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + Value *V = CI->getArgOperand(0); + CI->replaceAllUsesWith(V); + break; + } + // The setjmp/longjmp intrinsics should only exist in the code if it was // never optimized (ie, right out of the CFE), or if it has been hacked on // by the lowerinvoke pass. In both cases, the right thing to do is to @@ -546,14 +553,13 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { !CI->getType()->isIntegerTy()) return false; - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (!Ty) return false; // Okay, we can do this xform, do so now. - const Type *Tys[] = { Ty }; Module *M = CI->getParent()->getParent()->getParent(); - Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 589d0a9a671d..f985af8ba83e 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -24,10 +24,14 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" @@ -98,10 +102,10 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -LLVMTargetMachine::LLVMTargetMachine(const Target &T, - const std::string &Triple) - : TargetMachine(T), TargetTriple(Triple) { - AsmInfo = T.createAsmInfo(TargetTriple); +LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, + StringRef CPU, StringRef FS) + : TargetMachine(T, Triple, CPU, FS) { + AsmInfo = T.createMCAsmInfo(Triple); } // Set the default code model for the JIT for a generic target. @@ -136,14 +140,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; TargetAsmBackend *TAB = 0; if (ShowMCEncoding) { - MCE = getTarget().createCodeEmitter(*this, *Context); - TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context); + TAB = getTarget().createAsmBackend(getTargetTriple()); } MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, @@ -159,13 +164,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); - TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, + *Context); + TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); if (MCE == 0 || TAB == 0) return true; - AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context, - *TAB, Out, MCE, + AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), + *Context, *TAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); AsmStreamer.get()->InitSections(); @@ -240,13 +247,14 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx); - TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx); + TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple()); if (MCE == 0 || TAB == 0) return true; OwningPtr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx, + AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx, *TAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); @@ -384,6 +392,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Expand pseudo-instructions emitted by ISel. PM.add(createExpandISelPseudosPass()); + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. if (OptLevel != CodeGenOpt::None) @@ -412,12 +426,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After codegen peephole optimization pass"); } - // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) printAndVerify(PM, "After PreRegAlloc passes"); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 292928f8e787..5d38c83b49c2 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -123,7 +123,7 @@ public: /// getNext - Return the next UserValue in the equivalence class. UserValue *getNext() const { return next; } - /// match - Does this UserValue match the aprameters? + /// match - Does this UserValue match the parameters? bool match(const MDNode *Var, unsigned Offset) const { return Var == variable && Offset == offset; } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index b67f96667bfd..70003e7cc86a 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -244,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { +collectInterferingVRegs(unsigned MaxInterferingRegs) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -287,10 +287,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; - // Stop collecting when the max weight is exceeded. - if (RecentInterferingVReg->weight >= MaxWeight) - return InterferingVRegs.size(); - continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index c83578e99c6c..5e78d5e85029 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -229,8 +229,7 @@ public: // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, - float MaxWeight = HUGE_VALF); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 052abad57e16..b385fb36bbf1 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -298,10 +298,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (NumComp <= 1) continue; ++NumFracRanges; + bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + // If LI is an original interval that hasn't been split yet, make the new + // intervals their own originals instead of referring to LI. The original + // interval must contain all the split products, and LI doesn't. + if (IsOriginal) + VRM.setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 68946a2c9d13..8f0fb46879ac 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -22,7 +22,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Assembly/Writer.h" @@ -61,7 +60,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } -/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the +/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the /// parent pointer of the MBB, the MBB numbering, and any instructions in the /// MBB to be on the right operand list for registers. /// @@ -93,7 +92,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { assert(N->getParent() == 0 && "machine instruction already in a basic block"); N->setParent(Parent); - + // Add the instruction's register operands to their corresponding // use/def lists. MachineFunction *MF = Parent->getParent(); @@ -110,7 +109,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { // Remove from the use/def lists. N->RemoveRegOperandsFromUseLists(); - + N->setParent(0); LeakDetector::addGarbageObject(N); @@ -339,25 +338,64 @@ void MachineBasicBlock::updateTerminator() { } } -void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { - Successors.push_back(succ); - succ->addPredecessor(this); -} +void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) { + + // If we see non-zero value for the first time it means we actually use Weight + // list, so we fill all Weights with 0's. + if (weight != 0 && Weights.empty()) + Weights.resize(Successors.size()); + + if (weight != 0 || !Weights.empty()) + Weights.push_back(weight); + + Successors.push_back(succ); + succ->addPredecessor(this); + } void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { succ->removePredecessor(this); succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + Successors.erase(I); } -MachineBasicBlock::succ_iterator +MachineBasicBlock::succ_iterator MachineBasicBlock::removeSuccessor(succ_iterator I) { assert(I != Successors.end() && "Not a current successor!"); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(I); + Weights.erase(WI); + } + (*I)->removePredecessor(this); return Successors.erase(I); } +void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, + MachineBasicBlock *New) { + uint32_t weight = 0; + succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!Weights.empty()) { + weight_iterator WI = getWeightIterator(SI); + weight = *WI; + } + + // Update the successor information. + removeSuccessor(SI); + addSuccessor(New, weight); +} + void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { Predecessors.push_back(pred); } @@ -371,10 +409,17 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - + while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t weight = 0; + + + // If Weight list is empty it means we don't use it (disabled optimization). + if (!fromMBB->Weights.empty()) + weight = *fromMBB->Weights.begin(); + + addSuccessor(Succ, weight); fromMBB->removeSuccessor(Succ); } } @@ -383,7 +428,7 @@ void MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - + while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); addSuccessor(Succ); @@ -637,15 +682,14 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, } // Update the successor information. - removeSuccessor(Old); - addSuccessor(New); + replaceSuccessor(Old, New); } /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the /// CFG to be inserted. If we have proven that MBB can only branch to DestA and /// DestB, remove any other MBB successors from the CFG. DestA and DestB can be /// null. -/// +/// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. @@ -720,6 +764,26 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { return DL; } +/// getSuccWeight - Return weight of the edge from this block to MBB. +/// +uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) { + if (Weights.empty()) + return 0; + + succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + return *getWeightIterator(I); +} + +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::succ_iterator I) { + assert(Weights.size() == Successors.size() && "Async weight list!"); + size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/lib/CodeGen/MachineBlockFrequency.cpp b/lib/CodeGen/MachineBlockFrequency.cpp new file mode 100644 index 000000000000..893a320a6a63 --- /dev/null +++ b/lib/CodeGen/MachineBlockFrequency.cpp @@ -0,0 +1,59 @@ +//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/CodeGen/MachineBlockFrequency.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) + +char MachineBlockFrequency::ID = 0; + + +MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) { + initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry()); + MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction, + MachineBranchProbabilityInfo>(); +} + +MachineBlockFrequency::~MachineBlockFrequency() { + delete MBFI; +} + +void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) { + MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>(); + MBFI->doFunction(&F, &MBPI); + return false; +} + +/// getblockFreq - Return block frequency. Never return 0, value must be +/// positive. Please note that initial frequency is equal to 1024. It means that +/// we should not rely on the value itself, but only on the comparison to the +/// other block frequencies. We do this to avoid using of floating points. +/// +uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) { + return MBFI->getBlockFreq(MBB); +} diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp new file mode 100644 index 000000000000..c13fa6bc5333 --- /dev/null +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -0,0 +1,113 @@ +//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis uses probability info stored in Machine Basic Blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) +INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) + +char MachineBranchProbabilityInfo::ID = 0; + +uint32_t MachineBranchProbabilityInfo:: +getSumForBlock(MachineBasicBlock *MBB) const { + uint32_t Sum = 0; + + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + MachineBasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(MBB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +uint32_t +MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t Weight = Src->getSuccWeight(Dst); + if (!Weight) + return DEFAULT_WEIGHT; + return Weight; +} + +bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + uint32_t Weight = getEdgeWeight(Src, Dst); + uint32_t Sum = getSumForBlock(Src); + + // FIXME: Implement BranchProbability::compare then change this code to + // compare this BranchProbability against a static "hot" BranchProbability. + return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; +} + +MachineBasicBlock * +MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + MachineBasicBlock *MaxSucc = 0; + + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + MachineBasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(MBB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // FIXME: Use BranchProbability::compare. + if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4) + return MaxSucc; + + return 0; +} + +BranchProbability +MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream &MachineBranchProbabilityInfo:: +printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src, + MachineBasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index f97ccf65790f..3a60a37af443 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -260,12 +260,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (TID.mayLoad()) { + if (MCID.mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 50750a50ab89..cd2515652831 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -152,10 +152,10 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { /// of `new MachineInstr'. /// MachineInstr * -MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, +MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp) { return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) - MachineInstr(TID, DL, NoImp); + MachineInstr(MCID, DL, NoImp); } /// CloneMachineInstr - Create a new MachineInstr which is a copy of the diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 36b0b8330a86..143a29b08a1e 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -15,19 +15,22 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" +#include "llvm/LLVMContext.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Value.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -194,6 +197,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { getSubReg() == Other.getSubReg(); case MachineOperand::MO_Immediate: return getImm() == Other.getImm(); + case MachineOperand::MO_CImmediate: + return getCImm() == Other.getCImm(); case MachineOperand::MO_FPImmediate: return getFPImm() == Other.getFPImm(); case MachineOperand::MO_MachineBasicBlock: @@ -267,6 +272,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_Immediate: OS << getImm(); break; + case MachineOperand::MO_CImmediate: + getCImm()->getValue().print(OS, false); + break; case MachineOperand::MO_FPImmediate: if (getFPImm()->getType()->isFloatTy()) OS << getFPImm()->getValueAPF().convertToFloat(); @@ -454,9 +462,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { //===----------------------------------------------------------------------===// /// MachineInstr ctor - This constructor creates a dummy MachineInstr with -/// TID NULL and no operands. +/// MCID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock @@ -464,23 +472,23 @@ MachineInstr::MachineInstr() } void MachineInstr::addImplicitDefUseOperands() { - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs) + if (MCID->ImplicitDefs) + for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses) + if (MCID->ImplicitUses) + for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by -/// the TargetInstrDesc. -MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), +/// the MCInstrDesc. +MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { if (!NoImp) - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -488,13 +496,13 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) } /// MachineInstr ctor - As above, but with a DebugLoc. -MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, +MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp) - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -504,12 +512,12 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), +MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -519,12 +527,12 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr ctor - As above, but with a DebugLoc. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, - const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + const MCInstrDesc &tid) + : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + TID->getNumOperands()); + NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); + Operands.reserve(NumImplicitOps + MCID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -534,7 +542,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -621,7 +629,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { Operands.back().AddRegOperandToRegInfo(RegInfo); // If the register operand is flagged as early, mark the operand as such unsigned OpNo = Operands.size() - 1; - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } return; @@ -643,7 +651,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(0); // If the register operand is flagged as early, mark the operand as such - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } @@ -668,7 +676,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(RegInfo); // If the register operand is flagged as early, mark the operand as such - if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } @@ -691,7 +699,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { // If the register operand is flagged as early, mark the operand as such if (Operands[OpNo].isReg() - && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); } } @@ -794,6 +802,11 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, return false; } } + // If DebugLoc does not match then two dbg.values are not identical. + if (isDebugValue()) + if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown() + && getDebugLoc() != Other->getDebugLoc()) + return false; return true; } @@ -817,8 +830,8 @@ void MachineInstr::eraseFromParent() { /// OperandComplete - Return true if it's illegal to add a new operand /// bool MachineInstr::OperandsComplete() const { - unsigned short NumOperands = TID->getNumOperands(); - if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) + unsigned short NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands) return true; // Broken: we have all the operands of this instruction! return false; } @@ -826,8 +839,8 @@ bool MachineInstr::OperandsComplete() const { /// getNumExplicitOperands - Returns the number of non-implicit operands. /// unsigned MachineInstr::getNumExplicitOperands() const { - unsigned NumOperands = TID->getNumOperands(); - if (!TID->isVariadic()) + unsigned NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic()) return NumOperands; for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { @@ -928,10 +941,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, /// operand list that is used to represent the predicate. It returns -1 if /// none is found. int MachineInstr::findFirstPredOperandIdx() const { - const TargetInstrDesc &TID = getDesc(); - if (TID.isPredicable()) { + const MCInstrDesc &MCID = getDesc(); + if (MCID.isPredicable()) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (TID.OpInfo[i].isPredicate()) + if (MCID.OpInfo[i].isPredicate()) return i; } @@ -987,11 +1000,11 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const TargetInstrDesc &TID = getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (MO.isReg() && MO.isUse() && - TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) { + MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { if (UseOpIdx) *UseOpIdx = (unsigned)i; return true; @@ -1047,13 +1060,13 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { return false; } - const TargetInstrDesc &TID = getDesc(); - if (UseOpIdx >= TID.getNumOperands()) + const MCInstrDesc &MCID = getDesc(); + if (UseOpIdx >= MCID.getNumOperands()) return false; const MachineOperand &MO = getOperand(UseOpIdx); if (!MO.isReg() || !MO.isUse()) return false; - int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO); + int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); if (DefIdx == -1) return false; if (DefOpIdx) @@ -1093,11 +1106,11 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { /// copyPredicates - Copies predicate operand(s) from MI. void MachineInstr::copyPredicates(const MachineInstr *MI) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) { + if (MCID.OpInfo[i].isPredicate()) { // Predicated operands must be last operands. addOperand(MI->getOperand(i)); } @@ -1134,13 +1147,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (TID->mayStore() || TID->isCall()) { + if (MCID->mayStore() || MCID->isCall()) { SawStore = true; return false; } if (isLabel() || isDebugValue() || - TID->isTerminator() || hasUnmodeledSideEffects()) + MCID->isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1148,7 +1161,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (TID->mayLoad() && !isInvariantLoad(AA)) + if (MCID->mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1188,9 +1201,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, /// have no volatile memory references. bool MachineInstr::hasVolatileMemoryRef() const { // An instruction known never to access memory won't have a volatile access. - if (!TID->mayStore() && - !TID->mayLoad() && - !TID->isCall() && + if (!MCID->mayStore() && + !MCID->mayLoad() && + !MCID->isCall() && !hasUnmodeledSideEffects()) return false; @@ -1214,7 +1227,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. - if (!TID->mayLoad()) + if (!MCID->mayLoad()) return false; // If the instruction has lost its memoperands, conservatively assume that @@ -1364,6 +1377,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; + unsigned AsmDescOp = ~0u; + unsigned AsmOpCount = 0; if (isInlineAsm()) { // Print asm string. @@ -1377,7 +1392,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; - StartOp = InlineAsm::MIOp_FirstOperand; + StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; } @@ -1416,10 +1431,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (FirstOp) FirstOp = false; else OS << ","; OS << " "; if (i < getDesc().NumOperands) { - const TargetOperandInfo &TOI = getDesc().OpInfo[i]; - if (TOI.isPredicate()) + const MCOperandInfo &MCOI = getDesc().OpInfo[i]; + if (MCOI.isPredicate()) OS << "pred:"; - if (TOI.isOptionalDef()) + if (MCOI.isOptionalDef()) OS << "opt:"; } if (isDebugValue() && MO.isMetadata()) { @@ -1431,6 +1446,26 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { MO.print(OS, TM); } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + } else if (i == AsmDescOp && MO.isImm()) { + // Pretty print the inline asm operand descriptor. + OS << '$' << AsmOpCount++; + unsigned Flag = MO.getImm(); + switch (InlineAsm::getKind(Flag)) { + case InlineAsm::Kind_RegUse: OS << ":[reguse]"; break; + case InlineAsm::Kind_RegDef: OS << ":[regdef]"; break; + case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break; + case InlineAsm::Kind_Clobber: OS << ":[clobber]"; break; + case InlineAsm::Kind_Imm: OS << ":[imm]"; break; + case InlineAsm::Kind_Mem: OS << ":[mem]"; break; + default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break; + } + + unsigned TiedTo = 0; + if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) + OS << " [tiedto:$" << TiedTo << ']'; + + // Compute the index of the next operand descriptor. + AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); } else MO.print(OS, TM); } @@ -1685,3 +1720,24 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { } return Hash; } + +void MachineInstr::emitError(StringRef Msg) const { + // Find the source location cookie. + unsigned LocCookie = 0; + const MDNode *LocMD = 0; + for (unsigned i = getNumOperands(); i != 0; --i) { + if (getOperand(i-1).isMetadata() && + (LocMD = getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + if (const MachineBasicBlock *MBB = getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); + report_fatal_error(Msg); +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b315702eef8f..722ceb202439 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -28,10 +28,10 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" @@ -1018,9 +1018,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /*UnfoldStore=*/false, &LoadRegIndex); if (NewOpc == 0) return 0; - const TargetInstrDesc &TID = TII->get(NewOpc); - if (TID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); + const MCInstrDesc &MID = TII->get(NewOpc); + if (MID.getNumDefs() != 1) return 0; + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 08ff5bb71521..4b3e64c25f60 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -20,7 +20,6 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()]; UsedPhysRegs.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -38,25 +37,13 @@ MachineRegisterInfo::~MachineRegisterInfo() { "PhysRegUseDefLists has entries after all instructions are deleted"); #endif delete [] PhysRegUseDefLists; - delete [] RegClass2VRegMap; } /// setRegClass - Set the register class of the specified virtual register. /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { - const TargetRegisterClass *OldRC = VRegInfo[Reg].first; VRegInfo[Reg].first = RC; - - // Remove from old register class's vregs list. This may be slow but - // fortunately this operation is rarely needed. - std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector<unsigned>::iterator I = - std::find(VRegs.begin(), VRegs.end(), Reg); - VRegs.erase(I); - - // Add to new register class's vregs list. - RegClass2VRegMap[RC->getID()].push_back(Reg); } const TargetRegisterClass * @@ -95,7 +82,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) // The vector reallocated, handle this now. HandleVRegListReallocation(); - RegClass2VRegMap[RegClass->getID()].push_back(Reg); return Reg; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 471463b46f5b..7a55852a1315 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -62,6 +62,7 @@ namespace { raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; @@ -255,6 +256,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TM = &MF.getTarget(); + TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -387,8 +389,6 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - // Count the number of landing pad successors. SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), @@ -541,19 +541,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { - const TargetInstrDesc &TI = MI->getDesc(); - if (MI->getNumOperands() < TI.getNumOperands()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - *OS << TI.getNumOperands() << " operands expected, but " + *OS << MCID.getNumOperands() << " operands expected, but " << MI->getNumExplicitOperands() << " given.\n"; } // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { - if ((*I)->isLoad() && !TI.mayLoad()) + if ((*I)->isLoad() && !MCID.mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !TI.mayStore()) + if ((*I)->isStore() && !MCID.mayStore()) report("Missing mayStore flag", MI); } @@ -575,29 +575,30 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); - const TargetInstrDesc &TI = MI->getDesc(); - const TargetOperandInfo &TOI = TI.OpInfo[MONum]; + const MCInstrDesc &MCID = MI->getDesc(); + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; - // The first TI.NumDefs operands must be explicit register defines - if (MONum < TI.getNumDefs()) { + // The first MCID.NumDefs operands must be explicit register defines + if (MONum < MCID.getNumDefs()) { if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef()) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); - } else if (MONum < TI.getNumOperands()) { + } else if (MONum < MCID.getNumOperands()) { // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. - if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) { - if (MO->isDef() && !TOI.isOptionalDef()) + if (MO->isReg() && + !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) { + if (MO->isDef() && !MCOI.isOptionalDef()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. - if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg()) + if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg()) report("Extra explicit operand on non-variadic instruction", MO, MONum); } @@ -709,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } // Check register classes. - if (MONum < TI.getNumOperands() && !MO->isImplicit()) { + if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { unsigned SubIdx = MO->getSubReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -723,7 +724,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } sr = s; } - if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { if (!DRC->contains(sr)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(sr) << " is not a " @@ -743,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } RC = SRC; } - if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { + if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " @@ -765,11 +766,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index c105bb06ebe5..c523e39bc258 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -353,10 +353,10 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isMoveImmediate()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isMoveImmediate()) return false; - if (TID.getNumDefs() != 1) + if (MCID.getNumDefs() != 1) return false; unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -429,16 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); - if (TID.isBitcast()) { + if (MCID.isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; } - } else if (TID.isCompare()) { + } else if (MCID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index ba8501ff7233..c73e87733cb4 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,6 +22,7 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" +#include "RegisterClassInfo.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -37,7 +38,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -52,7 +53,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls"); STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); // Post-RA scheduling is enabled with -// TargetSubtarget.enablePostRAScheduler(). This flag can be used to +// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to // override the target. static cl::opt<bool> EnablePostRAScheduler("post-RA-scheduler", @@ -80,6 +81,7 @@ namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; CodeGenOpt::Level OptLevel; public: @@ -135,7 +137,8 @@ namespace { public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + AliasAnalysis *AA, const RegisterClassInfo&, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); ~SchedulePostRATDList(); @@ -179,7 +182,8 @@ namespace { SchedulePostRATDList::SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + AliasAnalysis *AA, const RegisterClassInfo &RCI, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), KillIndices(TRI->getNumRegs()) @@ -189,10 +193,10 @@ SchedulePostRATDList::SchedulePostRATDList( HazardRec = TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); AntiDepBreak = - ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL)); } SchedulePostRATDList::~SchedulePostRATDList() { @@ -205,9 +209,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); + RegClassInfo.runOnMachineFunction(Fn); // Check for explicit enable/disable of post-ra scheduling. - TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) @@ -215,22 +220,23 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>(); if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; } // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { - AntiDepMode = (EnableAntiDepBreaking == "all") ? - TargetSubtarget::ANTIDEP_ALL : - (EnableAntiDepBreaking == "critical") - ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; + AntiDepMode = (EnableAntiDepBreaking == "all") + ? TargetSubtargetInfo::ANTIDEP_ALL + : ((EnableAntiDepBreaking == "critical") + ? TargetSubtargetInfo::ANTIDEP_CRITICAL + : TargetSubtargetInfo::ANTIDEP_NONE); } DEBUG(dbgs() << "PostRAScheduler\n"); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); // Loop over all of the basic blocks diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp deleted file mode 100644 index d6e31dae9d13..000000000000 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ /dev/null @@ -1,1430 +0,0 @@ -//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the machine instruction level pre-register allocation -// live interval splitting pass. It finds live interval barriers, i.e. -// instructions which will kill all physical registers in certain register -// classes, and split all live intervals which cross the barrier. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-alloc-split" -#include "VirtRegMap.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), - cl::Hidden); -static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), - cl::Hidden); - -STATISTIC(NumSplits, "Number of intervals split"); -STATISTIC(NumRemats, "Number of intervals split by rematerialization"); -STATISTIC(NumFolds, "Number of intervals split with spill folding"); -STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding"); -STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers"); -STATISTIC(NumDeadSpills, "Number of dead spills removed"); - -namespace { - class PreAllocSplitting : public MachineFunctionPass { - MachineFunction *CurrMF; - const TargetMachine *TM; - const TargetInstrInfo *TII; - const TargetRegisterInfo* TRI; - MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; - SlotIndexes *SIs; - LiveIntervals *LIs; - LiveStacks *LSs; - VirtRegMap *VRM; - - // Barrier - Current barrier being processed. - MachineInstr *Barrier; - - // BarrierMBB - Basic block where the barrier resides in. - MachineBasicBlock *BarrierMBB; - - // Barrier - Current barrier index. - SlotIndex BarrierIdx; - - // CurrLI - Current live interval being split. - LiveInterval *CurrLI; - - // CurrSLI - Current stack slot live interval. - LiveInterval *CurrSLI; - - // CurrSValNo - Current val# for the stack slot live interval. - VNInfo *CurrSValNo; - - // IntervalSSMap - A map from live interval to spill slots. - DenseMap<unsigned, int> IntervalSSMap; - - // Def2SpillMap - A map from a def instruction index to spill index. - DenseMap<SlotIndex, SlotIndex> Def2SpillMap; - - public: - static char ID; - PreAllocSplitting() : MachineFunctionPass(ID) { - initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); - AU.addPreserved<RegisterCoalescer>(); - AU.addPreserved<CalculateSpillWeights>(); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<VirtRegMap>(); - AU.addPreserved<MachineDominatorTree>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreserved<VirtRegMap>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual void releaseMemory() { - IntervalSSMap.clear(); - Def2SpillMap.clear(); - } - - virtual const char *getPassName() const { - return "Pre-Register Allocaton Live Interval Splitting"; - } - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* M = 0) const { - LIs->print(O, M); - } - - - private: - - MachineBasicBlock::iterator - findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet<MachineInstr*, 4>&); - - MachineBasicBlock::iterator - findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex, - SmallPtrSet<MachineInstr*, 4>&); - - int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); - - bool IsAvailableInStack(MachineBasicBlock*, unsigned, - SlotIndex, SlotIndex, - SlotIndex&, int&) const; - - void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex); - - bool SplitRegLiveInterval(LiveInterval*); - - bool SplitRegLiveIntervals(const TargetRegisterClass **, - SmallPtrSet<LiveInterval*, 8>&); - - bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB, - MachineBasicBlock* BarrierMBB); - bool Rematerialize(unsigned vreg, VNInfo* ValNo, - MachineInstr* DefMI, - MachineBasicBlock::iterator RestorePt, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, - MachineInstr* DefMI, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int& SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - MachineInstr* FoldRestore(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB); - void RenumberValno(VNInfo* VN); - void ReconstructLiveInterval(LiveInterval* LI); - bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split); - unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs, - unsigned Reg, int FrameIndex, bool& TwoAddr); - VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock); - VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock); -}; -} // end anonymous namespace - -char PreAllocSplitting::ID = 0; - -INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting", - "Pre-Register Allocation Live Interval Splitting", - false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting", - "Pre-Register Allocation Live Interval Splitting", - false, false) - -char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; - -/// findSpillPoint - Find a gap as far away from the given MI that's suitable -/// for spilling the current live interval. The index must be before any -/// defs and uses of the live interval register in the mbb. Return begin() if -/// none is found. -MachineBasicBlock::iterator -PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, - MachineInstr *DefMI, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { - MachineBasicBlock::iterator Pt = MBB->begin(); - - MachineBasicBlock::iterator MII = MI; - MachineBasicBlock::iterator EndPt = DefMI - ? MachineBasicBlock::iterator(DefMI) : MBB->begin(); - - while (MII != EndPt && !RefsInMBB.count(MII) && - MII->getOpcode() != TRI->getCallFrameSetupOpcode()) - --MII; - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - - while (MII != EndPt && !RefsInMBB.count(MII)) { - // We can't insert the spill between the barrier (a call), and its - // corresponding call frame setup. - if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { - while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) { - --MII; - if (MII == EndPt) { - return Pt; - } - } - continue; - } else { - Pt = MII; - } - - if (RefsInMBB.count(MII)) - return Pt; - - - --MII; - } - - return Pt; -} - -/// findRestorePoint - Find a gap in the instruction index map that's suitable -/// for restoring the current live interval value. The index must be before any -/// uses of the live interval register in the mbb. Return end() if none is -/// found. -MachineBasicBlock::iterator -PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, - SlotIndex LastIdx, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { - // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb - // begin index accordingly. - MachineBasicBlock::iterator Pt = MBB->end(); - MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator(); - - // We start at the call, so walk forward until we find the call frame teardown - // since we can't insert restores before that. Bail if we encounter a use - // during this time. - MachineBasicBlock::iterator MII = MI; - if (MII == EndPt) return Pt; - - while (MII != EndPt && !RefsInMBB.count(MII) && - MII->getOpcode() != TRI->getCallFrameDestroyOpcode()) - ++MII; - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - ++MII; - - // FIXME: Limit the number of instructions to examine to reduce - // compile time? - while (MII != EndPt) { - SlotIndex Index = LIs->getInstructionIndex(MII); - if (Index > LastIdx) - break; - - // We can't insert a restore between the barrier (a call) and its - // corresponding call frame teardown. - if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) { - do { - if (MII == EndPt || RefsInMBB.count(MII)) return Pt; - ++MII; - } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else { - Pt = MII; - } - - if (RefsInMBB.count(MII)) - return Pt; - - ++MII; - } - - return Pt; -} - -/// CreateSpillStackSlot - Create a stack slot for the live interval being -/// split. If the live interval was previously split, just reuse the same -/// slot. -int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, - const TargetRegisterClass *RC) { - int SS; - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); - if (I != IntervalSSMap.end()) { - SS = I->second; - } else { - SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - IntervalSSMap[Reg] = SS; - } - - // Create live interval for stack slot. - CurrSLI = &LSs->getOrCreateInterval(SS, RC); - if (CurrSLI->hasAtLeastOneValue()) - CurrSValNo = CurrSLI->getValNumInfo(0); - else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, - LSs->getVNInfoAllocator()); - return SS; -} - -/// IsAvailableInStack - Return true if register is available in a split stack -/// slot at the specified index. -bool -PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, - unsigned Reg, SlotIndex DefIndex, - SlotIndex RestoreIndex, - SlotIndex &SpillIndex, - int& SS) const { - if (!DefMBB) - return false; - - DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg); - if (I == IntervalSSMap.end()) - return false; - DenseMap<SlotIndex, SlotIndex>::const_iterator - II = Def2SpillMap.find(DefIndex); - if (II == Def2SpillMap.end()) - return false; - - // If last spill of def is in the same mbb as barrier mbb (where restore will - // be), make sure it's not below the intended restore index. - // FIXME: Undo the previous spill? - assert(LIs->getMBBFromIndex(II->second) == DefMBB); - if (DefMBB == BarrierMBB && II->second >= RestoreIndex) - return false; - - SS = I->second; - SpillIndex = II->second; - return true; -} - -/// UpdateSpillSlotInterval - Given the specified val# of the register live -/// interval being split, and the spill and restore indicies, update the live -/// interval of the spill stack slot. -void -PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex, - SlotIndex RestoreIndex) { - assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && - "Expect restore in the barrier mbb"); - - MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex); - if (MBB == BarrierMBB) { - // Intra-block spill + restore. We are done. - LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo); - CurrSLI->addRange(SLR); - return; - } - - SmallPtrSet<MachineBasicBlock*, 4> Processed; - SlotIndex EndIdx = LIs->getMBBEndIdx(MBB); - LiveRange SLR(SpillIndex, EndIdx, CurrSValNo); - CurrSLI->addRange(SLR); - Processed.insert(MBB); - - // Start from the spill mbb, figure out the extend of the spill slot's - // live interval. - SmallVector<MachineBasicBlock*, 4> WorkList; - const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex); - if (LR->end > EndIdx) - // If live range extend beyond end of mbb, add successors to work list. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - WorkList.push_back(*SI); - - while (!WorkList.empty()) { - MachineBasicBlock *MBB = WorkList.back(); - WorkList.pop_back(); - if (Processed.count(MBB)) - continue; - SlotIndex Idx = LIs->getMBBStartIdx(MBB); - LR = CurrLI->getLiveRangeContaining(Idx); - if (LR && LR->valno == ValNo) { - EndIdx = LIs->getMBBEndIdx(MBB); - if (Idx <= RestoreIndex && RestoreIndex < EndIdx) { - // Spill slot live interval stops at the restore. - LiveRange SLR(Idx, RestoreIndex, CurrSValNo); - CurrSLI->addRange(SLR); - } else if (LR->end > EndIdx) { - // Live range extends beyond end of mbb, process successors. - LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo); - CurrSLI->addRange(SLR); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - WorkList.push_back(*SI); - } else { - LiveRange SLR(Idx, LR->end, CurrSValNo); - CurrSLI->addRange(SLR); - } - Processed.insert(MBB); - } - } -} - -/// PerformPHIConstruction - From properly set up use and def lists, use a PHI -/// construction algorithm to compute the ranges and valnos for an interval. -VNInfo* -PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock) { - // Return memoized result if it's available. - if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI)) - return NewVNs[UseI]; - else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI)) - return NewVNs[UseI]; - else if (!IsIntraBlock && LiveOut.count(MBB)) - return LiveOut[MBB]; - - // Check if our block contains any uses or defs. - bool ContainsDefs = Defs.count(MBB); - bool ContainsUses = Uses.count(MBB); - - VNInfo* RetVNI = 0; - - // Enumerate the cases of use/def contaning blocks. - if (!ContainsDefs && !ContainsUses) { - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - } else if (ContainsDefs && !ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; - - // Search for the def in this block. If we don't find it before the - // instruction we care about, go to the fallback case. Note that that - // should never happen: this cannot be intrablock, so use should - // always be an end() iterator. - assert(UseI == MBB->end() && "No use marked in intrablock"); - - MachineBasicBlock::iterator Walker = UseI; - --Walker; - while (Walker != MBB->begin()) { - if (BlockDefs.count(Walker)) - break; - --Walker; - } - - // Once we've found it, extend its VNInfo to our instruction. - SlotIndex DefIndex = LIs->getInstructionIndex(Walker); - DefIndex = DefIndex.getDefIndex(); - SlotIndex EndIndex = LIs->getMBBEndIdx(MBB); - - RetVNI = NewVNs[Walker]; - LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI)); - } else if (!ContainsDefs && ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; - - // Search for the use in this block that precedes the instruction we care - // about, going to the fallback case if we don't find it. - MachineBasicBlock::iterator Walker = UseI; - bool found = false; - while (Walker != MBB->begin()) { - --Walker; - if (BlockUses.count(Walker)) { - found = true; - break; - } - } - - if (!found) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - - SlotIndex UseIndex = LIs->getInstructionIndex(Walker); - UseIndex = UseIndex.getUseIndex(); - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - - // Now, recursively phi construct the VNInfo for the use we found, - // and then extend it to include the instruction we care about - RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, false, true); - - LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); - - // FIXME: Need to set kills properly for inter-block stuff. - } else if (ContainsDefs && ContainsUses) { - SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB]; - SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB]; - - // This case is basically a merging of the two preceding case, with the - // special note that checking for defs must take precedence over checking - // for uses, because of two-address instructions. - MachineBasicBlock::iterator Walker = UseI; - bool foundDef = false; - bool foundUse = false; - while (Walker != MBB->begin()) { - --Walker; - if (BlockDefs.count(Walker)) { - foundDef = true; - break; - } else if (BlockUses.count(Walker)) { - foundUse = true; - break; - } - } - - if (!foundDef && !foundUse) - return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, - IsTopLevel, IsIntraBlock); - - SlotIndex StartIndex = LIs->getInstructionIndex(Walker); - StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex(); - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - - if (foundDef) - RetVNI = NewVNs[Walker]; - else - RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, - NewVNs, LiveOut, Phis, false, true); - - LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - } - - // Memoize results so we don't have to recompute them. - if (!IsIntraBlock) LiveOut[MBB] = RetVNI; - else { - if (!NewVNs.count(UseI)) - NewVNs[UseI] = RetVNI; - Visited.insert(UseI); - } - - return RetVNI; -} - -/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path. -/// -VNInfo* -PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI, - MachineBasicBlock* MBB, LiveInterval* LI, - SmallPtrSet<MachineInstr*, 4>& Visited, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs, - DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses, - DenseMap<MachineInstr*, VNInfo*>& NewVNs, - DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut, - DenseMap<MachineBasicBlock*, VNInfo*>& Phis, - bool IsTopLevel, bool IsIntraBlock) { - // NOTE: Because this is the fallback case from other cases, we do NOT - // assume that we are not intrablock here. - if (Phis.count(MBB)) return Phis[MBB]; - - SlotIndex StartIndex = LIs->getMBBStartIdx(MBB); - VNInfo *RetVNI = Phis[MBB] = - LI->getNextValue(SlotIndex(), /*FIXME*/ 0, - LIs->getVNInfoAllocator()); - - if (!IsIntraBlock) LiveOut[MBB] = RetVNI; - - // If there are no uses or defs between our starting point and the - // beginning of the block, then recursive perform phi construction - // on our predecessors. - DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, - Visited, Defs, Uses, NewVNs, - LiveOut, Phis, false, false); - if (Incoming != 0) - IncomingVNs[*PI] = Incoming; - } - - if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) { - VNInfo* OldVN = RetVNI; - VNInfo* NewVN = IncomingVNs.begin()->second; - VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); - if (MergedVN == OldVN) std::swap(OldVN, NewVN); - - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(), - LOE = LiveOut.end(); LOI != LOE; ++LOI) - if (LOI->second == OldVN) - LOI->second = MergedVN; - for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(), - NVE = NewVNs.end(); NVI != NVE; ++NVI) - if (NVI->second == OldVN) - NVI->second = MergedVN; - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(), - PE = Phis.end(); PI != PE; ++PI) - if (PI->second == OldVN) - PI->second = MergedVN; - RetVNI = MergedVN; - } else { - // Otherwise, merge the incoming VNInfos with a phi join. Create a new - // VNInfo to represent the joined value. - for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = - IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { - I->second->setHasPHIKill(true); - } - } - - SlotIndex EndIndex; - if (IsIntraBlock) { - EndIndex = LIs->getInstructionIndex(UseI).getDefIndex(); - } else - EndIndex = LIs->getMBBEndIdx(MBB); - LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - - // Memoize results so we don't have to recompute them. - if (!IsIntraBlock) - LiveOut[MBB] = RetVNI; - else { - if (!NewVNs.count(UseI)) - NewVNs[UseI] = RetVNI; - Visited.insert(UseI); - } - - return RetVNI; -} - -/// ReconstructLiveInterval - Recompute a live interval from scratch. -void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { - VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator(); - - // Clear the old ranges and valnos; - LI->clear(); - - // Cache the uses and defs of the register - typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap; - RegMap Defs, Uses; - - // Keep track of the new VNs we're creating. - DenseMap<MachineInstr*, VNInfo*> NewVNs; - SmallPtrSet<VNInfo*, 2> PhiVNs; - - // Cache defs, and create a new VNInfo for each def. - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - Defs[(*DI).getParent()].insert(&*DI); - - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = DefIdx.getDefIndex(); - - assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting."); - VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); - - // If the def is a move, set the copy field. - if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) - NewVN->setCopy(&*DI); - - NewVNs[&*DI] = NewVN; - } - - // Cache uses as a separate pass from actually processing them. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), - UE = MRI->use_end(); UI != UE; ++UI) - Uses[(*UI).getParent()].insert(&*UI); - - // Now, actually process every use and use a phi construction algorithm - // to walk from it to its reaching definitions, building VNInfos along - // the way. - DenseMap<MachineBasicBlock*, VNInfo*> LiveOut; - DenseMap<MachineBasicBlock*, VNInfo*> Phis; - SmallPtrSet<MachineInstr*, 4> Visited; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg), - UE = MRI->use_end(); UI != UE; ++UI) { - PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs, - Uses, NewVNs, LiveOut, Phis, true, true); - } - - // Add ranges for dead defs - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = DefIdx.getDefIndex(); - - if (LI->liveAt(DefIdx)) continue; - - VNInfo* DeadVN = NewVNs[&*DI]; - LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); - } -} - -/// RenumberValno - Split the given valno out into a new vreg, allowing it to -/// be allocated to a different register. This function creates a new vreg, -/// copies the valno and its live ranges over to the new vreg's interval, -/// removes them from the old interval, and rewrites all uses and defs of -/// the original reg to the new vreg within those ranges. -void PreAllocSplitting::RenumberValno(VNInfo* VN) { - SmallVector<VNInfo*, 4> Stack; - SmallVector<VNInfo*, 4> VNsToCopy; - Stack.push_back(VN); - - // Walk through and copy the valno we care about, and any other valnos - // that are two-address redefinitions of the one we care about. These - // will need to be rewritten as well. We also check for safety of the - // renumbering here, by making sure that none of the valno involved has - // phi kills. - while (!Stack.empty()) { - VNInfo* OldVN = Stack.back(); - Stack.pop_back(); - - // Bail out if we ever encounter a valno that has a PHI kill. We can't - // renumber these. - if (OldVN->hasPHIKill()) return; - - VNsToCopy.push_back(OldVN); - - // Locate two-address redefinitions - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), - DE = MRI->def_end(); DI != DE; ++DI) { - if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; - SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); - VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); - if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != - VNsToCopy.end()) - Stack.push_back(NextVN); - } - } - - // Create the new vreg - unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg)); - - // Create the new live interval - LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg); - - for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = - VNsToCopy.end(); OI != OE; ++OI) { - VNInfo* OldVN = *OI; - - // Copy the valno over - VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator()); - NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); - - // Remove the valno from the old interval - CurrLI->removeValNo(OldVN); - } - - // Rewrite defs and uses. This is done in two stages to avoid invalidating - // the reg_iterator. - SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange; - - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand& MO = I.getOperand(); - SlotIndex InstrIdx = LIs->getInstructionIndex(&*I); - - if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) || - (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex()))) - OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); - } - - for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I = - OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) { - MachineInstr* Inst = I->first; - unsigned OpIdx = I->second; - MachineOperand& MO = Inst->getOperand(OpIdx); - MO.setReg(NewVReg); - } - - // Grow the VirtRegMap, since we've created a new vreg. - VRM->grow(); - - // The renumbered vreg shares a stack slot with the old register. - if (IntervalSSMap.count(CurrLI->reg)) - IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; - - ++NumRenumbers; -} - -bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, - MachineInstr* DefMI, - MachineBasicBlock::iterator RestorePt, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - MachineBasicBlock& MBB = *RestorePt->getParent(); - - MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - if (!DefMI || DefMI->getParent() == BarrierMBB) - KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); - else - KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); - - if (KillPt == DefMI->getParent()->end()) - return false; - - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); - SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); - - ReconstructLiveInterval(CurrLI); - RematIdx = RematIdx.getDefIndex(); - RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); - - ++NumSplits; - ++NumRemats; - return true; -} - -MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* DefMI, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int& SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - // Go top down if RefsInMBB is empty. - if (RefsInMBB.empty()) - return 0; - - MachineBasicBlock::iterator FoldPt = Barrier; - while (&*FoldPt != DefMI && FoldPt != MBB->begin() && - !RefsInMBB.count(FoldPt)) - --FoldPt; - - int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg); - if (OpIdx == -1) - return 0; - - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - - if (!TII->canFoldMemoryOperand(FoldPt, Ops)) - return 0; - - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg); - if (I != IntervalSSMap.end()) { - SS = I->second; - } else { - SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - } - - MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); - - if (FMI) { - LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FoldPt->eraseFromParent(); - ++NumFolds; - - IntervalSSMap[vreg] = SS; - CurrSLI = &LSs->getOrCreateInterval(SS, RC); - if (CurrSLI->hasAtLeastOneValue()) - CurrSValNo = CurrSLI->getValNumInfo(0); - else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, - LSs->getVNInfoAllocator()); - } - - return FMI; -} - -MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, - const TargetRegisterClass* RC, - MachineInstr* Barrier, - MachineBasicBlock* MBB, - int SS, - SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { - if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds) - return 0; - - // Go top down if RefsInMBB is empty. - if (RefsInMBB.empty()) - return 0; - - // Can't fold a restore between a call stack setup and teardown. - MachineBasicBlock::iterator FoldPt = Barrier; - - // Advance from barrier to call frame teardown. - while (FoldPt != MBB->getFirstTerminator() && - FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { - if (RefsInMBB.count(FoldPt)) - return 0; - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - else - ++FoldPt; - - // Now find the restore point. - while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) { - if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) { - while (FoldPt != MBB->getFirstTerminator() && - FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) { - if (RefsInMBB.count(FoldPt)) - return 0; - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - } - - ++FoldPt; - } - - if (FoldPt == MBB->getFirstTerminator()) - return 0; - - int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true); - if (OpIdx == -1) - return 0; - - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - - if (!TII->canFoldMemoryOperand(FoldPt, Ops)) - return 0; - - MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); - - if (FMI) { - LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FoldPt->eraseFromParent(); - ++NumRestoreFolds; - } - - return FMI; -} - -/// SplitRegLiveInterval - Split (spill and restore) the given live interval -/// so it would not cross the barrier that's being processed. Shrink wrap -/// (minimize) the live interval to the last uses. -bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { - DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier - << " result: "); - - CurrLI = LI; - - // Find live range where current interval cross the barrier. - LiveInterval::iterator LR = - CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex()); - VNInfo *ValNo = LR->valno; - - assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); - - MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def); - - // If this would create a new join point, do not split. - if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { - DEBUG(dbgs() << "FAILED (would create a new join point).\n"); - return false; - } - - // Find all references in the barrier mbb. - SmallPtrSet<MachineInstr*, 4> RefsInMBB; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), - E = MRI->reg_end(); I != E; ++I) { - MachineInstr *RefMI = &*I; - if (RefMI->getParent() == BarrierMBB) - RefsInMBB.insert(RefMI); - } - - // Find a point to restore the value after the barrier. - MachineBasicBlock::iterator RestorePt = - findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); - if (RestorePt == BarrierMBB->end()) { - DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n"); - return false; - } - - if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) - if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { - DEBUG(dbgs() << "success (remat).\n"); - return true; - } - - // Add a spill either before the barrier or after the definition. - MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; - const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); - SlotIndex SpillIndex; - MachineInstr *SpillMI = NULL; - int SS = -1; - if (!DefMI) { - // If we don't know where the def is we must split just before the barrier. - if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, - BarrierMBB, SS, RefsInMBB))) { - SpillIndex = LIs->getInstructionIndex(SpillMI); - } else { - MachineBasicBlock::iterator SpillPt = - findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); - if (SpillPt == BarrierMBB->begin()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - // Add spill. - - SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC, - TRI); - SpillMI = prior(SpillPt); - SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); - } - } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, - LIs->getZeroIndex(), SpillIndex, SS)) { - // If it's already split, just restore the value. There is no need to spill - // the def again. - if (!DefMI) { - DEBUG(dbgs() << "FAILED (def is dead).\n"); - return false; // Def is dead. Do nothing. - } - - if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, - BarrierMBB, SS, RefsInMBB))) { - SpillIndex = LIs->getInstructionIndex(SpillMI); - } else { - // Check if it's possible to insert a spill after the def MI. - MachineBasicBlock::iterator SpillPt; - if (DefMBB == BarrierMBB) { - // Add spill after the def and the last use before the barrier. - SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, - RefsInMBB); - if (SpillPt == DefMBB->begin()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - } else { - SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); - if (SpillPt == DefMBB->end()) { - DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n"); - return false; // No gap to insert spill. - } - } - // Add spill. - SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC, - TRI); - SpillMI = prior(SpillPt); - SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); - } - } - - // Remember def instruction index to spill index mapping. - if (DefMI && SpillMI) - Def2SpillMap[ValNo->def] = SpillIndex; - - // Add restore. - bool FoldedRestore = false; - SlotIndex RestoreIndex; - if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, - BarrierMBB, SS, RefsInMBB)) { - RestorePt = LMI; - RestoreIndex = LIs->getInstructionIndex(RestorePt); - FoldedRestore = true; - } else { - TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI); - MachineInstr *LoadMI = prior(RestorePt); - RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); - } - - // Update spill stack slot live interval. - UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(), - RestoreIndex.getDefIndex()); - - ReconstructLiveInterval(CurrLI); - - if (!FoldedRestore) { - SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); - RestoreIdx = RestoreIdx.getDefIndex(); - RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx)); - } - - ++NumSplits; - DEBUG(dbgs() << "success.\n"); - return true; -} - -/// SplitRegLiveIntervals - Split all register live intervals that cross the -/// barrier that's being processed. -bool -PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, - SmallPtrSet<LiveInterval*, 8>& Split) { - // First find all the virtual registers whose live intervals are intercepted - // by the current barrier. - SmallVector<LiveInterval*, 8> Intervals; - for (const TargetRegisterClass **RC = RCs; *RC; ++RC) { - // FIXME: If it's not safe to move any instruction that defines the barrier - // register class, then it means there are some special dependencies which - // codegen is not modelling. Ignore these barriers for now. - if (!TII->isSafeToMoveRegClassDefs(*RC)) - continue; - const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); - for (unsigned i = 0, e = VRs.size(); i != e; ++i) { - unsigned Reg = VRs[i]; - if (!LIs->hasInterval(Reg)) - continue; - LiveInterval *LI = &LIs->getInterval(Reg); - if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg)) - // Virtual register live interval is intercepted by the barrier. We - // should split and shrink wrap its interval if possible. - Intervals.push_back(LI); - } - } - - // Process the affected live intervals. - bool Change = false; - while (!Intervals.empty()) { - if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) - break; - LiveInterval *LI = Intervals.back(); - Intervals.pop_back(); - bool result = SplitRegLiveInterval(LI); - if (result) Split.insert(LI); - Change |= result; - } - - return Change; -} - -unsigned PreAllocSplitting::getNumberOfNonSpills( - SmallPtrSet<MachineInstr*, 4>& MIs, - unsigned Reg, int FrameIndex, - bool& FeedsTwoAddr) { - unsigned NonSpills = 0; - for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end(); - UI != UE; ++UI) { - int StoreFrameIndex; - unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) - ++NonSpills; - - int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) - FeedsTwoAddr = true; - } - - return NonSpills; -} - -/// removeDeadSpills - After doing splitting, filter through all intervals we've -/// split, and see if any of the spills are unnecessary. If so, remove them. -bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { - bool changed = false; - - // Walk over all of the live intervals that were touched by the splitter, - // and see if we can do any DCE and/or folding. - for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(), - LE = split.end(); LI != LE; ++LI) { - DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount; - - // First, collect all the uses of the vreg, and sort them by their - // reaching definition (VNInfo). - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), - UE = MRI->use_end(); UI != UE; ++UI) { - SlotIndex index = LIs->getInstructionIndex(&*UI); - index = index.getUseIndex(); - - const LiveRange* LR = (*LI)->getLiveRangeContaining(index); - VNUseCount[LR->valno].insert(&*UI); - } - - // Now, take the definitions (VNInfo's) one at a time and try to DCE - // and/or fold them away. - for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(), - VE = (*LI)->vni_end(); VI != VE; ++VI) { - - if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) - return changed; - - VNInfo* CurrVN = *VI; - - // We don't currently try to handle definitions with PHI kills, because - // it would involve processing more than one VNInfo at once. - if (CurrVN->hasPHIKill()) continue; - - // We also don't try to handle the results of PHI joins, since there's - // no defining instruction to analyze. - MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); - if (!DefMI || CurrVN->isUnused()) continue; - - // We're only interested in eliminating cruft introduced by the splitter, - // is of the form load-use or load-use-store. First, check that the - // definition is a load, and remember what stack slot we loaded it from. - int FrameIndex; - if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; - - // If the definition has no uses at all, just DCE it. - if (VNUseCount[CurrVN].size() == 0) { - LIs->RemoveMachineInstrFromMaps(DefMI); - (*LI)->removeValNo(CurrVN); - DefMI->eraseFromParent(); - VNUseCount.erase(CurrVN); - ++NumDeadSpills; - changed = true; - continue; - } - - // Second, get the number of non-store uses of the definition, as well as - // a flag indicating whether it feeds into a later two-address definition. - bool FeedsTwoAddr = false; - unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN], - (*LI)->reg, FrameIndex, - FeedsTwoAddr); - - // If there's one non-store use and it doesn't feed a two-addr, then - // this is a load-use-store case that we can try to fold. - if (NonSpillCount == 1 && !FeedsTwoAddr) { - // Start by finding the non-store use MachineInstr. - SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(); - int StoreFrameIndex; - unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - while (UI != VNUseCount[CurrVN].end() && - (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) { - ++UI; - if (UI != VNUseCount[CurrVN].end()) - StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); - } - if (UI == VNUseCount[CurrVN].end()) continue; - - MachineInstr* use = *UI; - - // Attempt to fold it away! - int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false); - if (OpIdx == -1) continue; - SmallVector<unsigned, 1> Ops; - Ops.push_back(OpIdx); - if (!TII->canFoldMemoryOperand(use, Ops)) continue; - - MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); - - if (!NewMI) continue; - - // Update relevant analyses. - LIs->RemoveMachineInstrFromMaps(DefMI); - LIs->ReplaceMachineInstrInMaps(use, NewMI); - (*LI)->removeValNo(CurrVN); - - DefMI->eraseFromParent(); - use->eraseFromParent(); - VNUseCount[CurrVN].erase(use); - - // Remove deleted instructions. Note that we need to remove them from - // the VNInfo->use map as well, just to be safe. - for (SmallPtrSet<MachineInstr*, 4>::iterator II = - VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end(); - II != IE; ++II) { - for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator - VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; - ++VNI) - if (VNI->first != CurrVN) - VNI->second.erase(*II); - LIs->RemoveMachineInstrFromMaps(*II); - (*II)->eraseFromParent(); - } - - VNUseCount.erase(CurrVN); - - for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator - VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI) - if (VI->second.erase(use)) - VI->second.insert(NewMI); - - ++NumDeadSpills; - changed = true; - continue; - } - - // If there's more than one non-store instruction, we can't profitably - // fold it, so bail. - if (NonSpillCount) continue; - - // Otherwise, this is a load-store case, so DCE them. - for (SmallPtrSet<MachineInstr*, 4>::iterator UI = - VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); - UI != UE; ++UI) { - LIs->RemoveMachineInstrFromMaps(*UI); - (*UI)->eraseFromParent(); - } - - VNUseCount.erase(CurrVN); - - LIs->RemoveMachineInstrFromMaps(DefMI); - (*LI)->removeValNo(CurrVN); - DefMI->eraseFromParent(); - ++NumDeadSpills; - changed = true; - } - } - - return changed; -} - -bool PreAllocSplitting::createsNewJoin(LiveRange* LR, - MachineBasicBlock* DefMBB, - MachineBasicBlock* BarrierMBB) { - if (DefMBB == BarrierMBB) - return false; - - if (LR->valno->hasPHIKill()) - return false; - - SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB); - if (LR->end < MBBEnd) - return false; - - MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>(); - if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB)) - return true; - - MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); - SmallPtrSet<MachineBasicBlock*, 4> Visited; - typedef std::pair<MachineBasicBlock*, - MachineBasicBlock::succ_iterator> ItPair; - SmallVector<ItPair, 4> Stack; - Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin())); - - while (!Stack.empty()) { - ItPair P = Stack.back(); - Stack.pop_back(); - - MachineBasicBlock* PredMBB = P.first; - MachineBasicBlock::succ_iterator S = P.second; - - if (S == PredMBB->succ_end()) - continue; - else if (Visited.count(*S)) { - Stack.push_back(std::make_pair(PredMBB, ++S)); - continue; - } else - Stack.push_back(std::make_pair(PredMBB, S+1)); - - MachineBasicBlock* MBB = *S; - Visited.insert(MBB); - - if (MBB == BarrierMBB) - return true; - - MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB); - MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB); - MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom(); - while (MDTN) { - if (MDTN == DefMDTN) - return true; - else if (MDTN == BarrierMDTN) - break; - MDTN = MDTN->getIDom(); - } - - MBBEnd = LIs->getMBBEndIdx(MBB); - if (LR->end > MBBEnd) - Stack.push_back(std::make_pair(MBB, MBB->succ_begin())); - } - - return false; -} - - -bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) { - CurrMF = &MF; - TM = &MF.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); - MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); - SIs = &getAnalysis<SlotIndexes>(); - LIs = &getAnalysis<LiveIntervals>(); - LSs = &getAnalysis<LiveStacks>(); - VRM = &getAnalysis<VirtRegMap>(); - - bool MadeChange = false; - - // Make sure blocks are numbered in order. - MF.RenumberBlocks(); - - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet<MachineBasicBlock*,16> Visited; - - SmallPtrSet<LiveInterval*, 8> Split; - - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - BarrierMBB = *DFI; - for (MachineBasicBlock::iterator I = BarrierMBB->begin(), - E = BarrierMBB->end(); I != E; ++I) { - Barrier = &*I; - const TargetRegisterClass **BarrierRCs = - Barrier->getDesc().getRegClassBarriers(); - if (!BarrierRCs) - continue; - BarrierIdx = LIs->getInstructionIndex(Barrier); - MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split); - } - } - - MadeChange |= removeDeadSpills(Split); - - return MadeChange; -} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index f1f3c9969cc8..a901c5fefa3e 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -145,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -152,8 +153,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode - int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); - int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); // Early exit for targets which have no call frame setup/destroy pseudo // instructions. @@ -705,12 +706,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; - int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); - int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); + int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 1d77b29e2a2e..5ea26adc7644 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -20,6 +20,7 @@ #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -34,7 +35,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -141,7 +141,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -324,19 +324,21 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!" - "\nCannot allocate: " << *VirtReg; + const char *Msg = "ran out of registers during register allocation"; + // Probably caused by an inline asm. + MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - MachineInstr *MI = I.skipInstruction();) { - if (!MI->isInlineAsm()) - continue; - Msg << "\nPlease check your inline asm statement for " - "invalid constraints:\n"; - MI->print(Msg, &VRM->getMachineFunction().getTarget()); - } - report_fatal_error(Msg.str()); + (MI = I.skipInstruction());) + if (MI->isInlineAsm()) + break; + if (MI) + MI->emitError(Msg); + else + report_fatal_error(Msg); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; } if (AvailablePhysReg) diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 97652036f988..b36a445291b7 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -86,7 +86,7 @@ namespace { // that is currently available in a physical register. LiveRegMap LiveVirtRegs; - DenseMap<unsigned, MachineInstr *> LiveDbgValueMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap; // RegState - Track the state of a physical register. enum RegState { @@ -118,7 +118,7 @@ namespace { // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. - SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; + SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs; // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should @@ -272,7 +272,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) { + SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first]; + for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { + MachineInstr *DBG = LRIDbgValues[li]; const MDNode *MDPtr = DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); int64_t Offset = 0; @@ -291,9 +293,11 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = DBG->getParent(); MBB->insert(MI, NewDV); DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); - LiveDbgValueMap[LRI->first] = NewDV; } } + // Now this register is spilled there is should not be any DBG_VALUE pointing + // to this register because they are all pointing to spilled value now. + LRIDbgValues.clear(); if (SpillKill) LR.LastUse = 0; // Don't kill register again } @@ -419,7 +423,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { if (UsedInInstr.test(PhysReg)) { - DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -428,15 +432,15 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { case regFree: return 0; case regReserved: - DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " - << PhysReg << " is reserved already.\n"); + DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " + << PrintReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } // This is a disabled register, add up cost of aliases. - DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -487,14 +491,12 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // Take hint when possible. if (Hint) { - switch(calcSpillCost(Hint)) { - default: - definePhysReg(MI, Hint, regFree); - // Fall through. - case 0: + // Ignore the hint if we would have to spill a dirty register. + unsigned Cost = calcSpillCost(Hint); + if (Cost < spillDirty) { + if (Cost) + definePhysReg(MI, Hint, regFree); return assignVirtToPhysReg(LRE, Hint); - case spillImpossible: - break; } } @@ -513,7 +515,7 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned Cost = calcSpillCost(*I); - DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. @@ -528,16 +530,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { return assignVirtToPhysReg(LRE, BestReg); } - // Nothing we can do. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for " - << "invalid constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); + // Nothing we can do. Report an error and keep going with a bad allocation. + MI->emitError("ran out of registers during register allocation"); + definePhysReg(MI, *AO.begin(), regFree); + assignVirtToPhysReg(LRE, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. @@ -724,7 +720,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); + DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) + << " as used in instr\n"); UsedInInstr.set(Reg); } @@ -774,7 +771,7 @@ void RAFast::AllocateBasicBlock() { // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB->end()) { MachineInstr *MI = MII++; - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); DEBUG({ dbgs() << "\n>> " << *MI << "Regs:"; for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { @@ -818,7 +815,7 @@ void RAFast::AllocateBasicBlock() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LiveDbgValueMap[Reg] = MI; + LiveDbgValueMap[Reg].push_back(MI); LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); @@ -887,7 +884,7 @@ void RAFast::AllocateBasicBlock() { VirtOpEnd = i+1; if (MO.isUse()) { hasTiedOps = hasTiedOps || - TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; } else { if (MO.isEarlyClobber()) hasEarlyClobbers = true; @@ -917,7 +914,7 @@ void RAFast::AllocateBasicBlock() { // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) { + (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. CopyDst = 0; @@ -962,7 +959,7 @@ void RAFast::AllocateBasicBlock() { } unsigned DefOpEnd = MI->getNumOperands(); - if (TID.isCall()) { + if (MCID.isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots, and 2. we don't have to wade through @@ -973,7 +970,7 @@ void RAFast::AllocateBasicBlock() { // The imp-defs are skipped below, but we still need to mark those // registers as used by the function. - SkippedInstrs.insert(&TID); + SkippedInstrs.insert(&MCID); } // Third scan. @@ -1059,7 +1056,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { MRI->closePhysRegsUsed(*TRI); // Add the clobber lists for all the instructions we skipped earlier. - for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator + for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) if (const unsigned *Defs = (*I)->getImplicitDefs()) while (*Defs) diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 8d0632567bb1..e235e87b54f3 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -22,6 +22,7 @@ #include "SpillPlacement.h" #include "SplitKit.h" #include "VirtRegMap.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" @@ -33,11 +34,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineLoopRanges.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -68,7 +67,6 @@ class RAGreedy : public MachineFunctionPass, LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; - MachineLoopRanges *LoopRanges; EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; @@ -76,6 +74,7 @@ class RAGreedy : public MachineFunctionPass, // state std::auto_ptr<Spiller> SpillerInstance; std::priority_queue<std::pair<unsigned, unsigned> > Queue; + unsigned NextCascade; // Live ranges pass through a number of stages as we try to allocate them. // Some of the stages may also create new live ranges: @@ -101,29 +100,49 @@ class RAGreedy : public MachineFunctionPass, static const char *const StageName[]; - IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage; + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage; + + // Cascade - Eviction loop prevention. See canEvictInterference(). + unsigned Cascade; + + RegInfo() : Stage(RS_New), Cascade(0) {} + }; + + IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return LiveRangeStage(LRStage[VirtReg.reg]); + return ExtraRegInfo[VirtReg.reg].Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[VirtReg.reg].Stage = Stage; } template<typename Iterator> void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { unsigned Reg = (*Begin)->reg; - if (LRStage[Reg] == RS_New) - LRStage[Reg] = NewStage; + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = NewStage; } } - // Eviction. Sometimes an assigned live range can be evicted without - // conditions, but other times it must be split after being evicted to avoid - // infinite loops. - enum CanEvict { - CE_Never, ///< Can never evict. - CE_Always, ///< Can always evict. - CE_WithSplit ///< Can evict only if range is also split or spilled. + /// Cost of evicting interference. + struct EvictionCost { + unsigned BrokenHints; ///< Total number of broken hints. + float MaxWeight; ///< Maximum spill weight evicted. + + EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + + bool operator<(const EvictionCost &O) const { + if (BrokenHints != O.BrokenHints) + return BrokenHints < O.BrokenHints; + return MaxWeight < O.MaxWeight; + } }; // splitting state. @@ -139,11 +158,13 @@ class RAGreedy : public MachineFunctionPass, /// Global live range splitting candidate info. struct GlobalSplitCandidate { unsigned PhysReg; + InterferenceCache::Cursor Intf; BitVector LiveBundles; SmallVector<unsigned, 8> ActiveBlocks; - void reset(unsigned Reg) { + void reset(InterferenceCache &Cache, unsigned Reg) { PhysReg = Reg; + Intf.setPhysReg(Cache, Reg); LiveBundles.clear(); ActiveBlocks.clear(); } @@ -185,13 +206,15 @@ private: float calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, float&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); - void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); - float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void growRegion(GlobalSplitCandidate &Cand); + float calcGlobalSplitCost(GlobalSplitCandidate&); void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl<LiveInterval*>&); void calcGapWeights(unsigned, SmallVectorImpl<float>&); - CanEvict canEvict(LiveInterval &A, LiveInterval &B); - bool canEvictInterference(LiveInterval&, unsigned, float&); + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + void evictInterference(LiveInterval&, unsigned, + SmallVectorImpl<LiveInterval*>&); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); @@ -228,18 +251,17 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { +RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); @@ -264,8 +286,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); - AU.addRequired<MachineLoopRanges>(); - AU.addPreserved<MachineLoopRanges>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); AU.addRequired<EdgeBundles>(); @@ -308,13 +328,13 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { // LRE may clone a virtual register because dead code elimination causes it to // be split into connected components. Ensure that the new register gets the // same stage as the parent. - LRStage.grow(New); - LRStage[New] = LRStage[Old]; + ExtraRegInfo.grow(New); + ExtraRegInfo[New] = ExtraRegInfo[Old]; } void RAGreedy::releaseMemory() { SpillerInstance.reset(0); - LRStage.clear(); + ExtraRegInfo.clear(); GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -328,11 +348,11 @@ void RAGreedy::enqueue(LiveInterval *LI) { "Can only enqueue virtual registers"); unsigned Prio; - LRStage.grow(Reg); - if (LRStage[Reg] == RS_New) - LRStage[Reg] = RS_First; + ExtraRegInfo.grow(Reg); + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = RS_First; - if (LRStage[Reg] == RS_Second) + if (ExtraRegInfo[Reg].Stage == RS_Second) // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Long ranges are allocated last so // they are split against realistic interference. @@ -375,7 +395,21 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; - // PhysReg is available. Try to evict interference from a cheaper alternative. + // PhysReg is available, but there may be a better choice. + + // If we missed a simple hint, try to cheaply evict interference from the + // preferred register. + if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Order.isHint(Hint)) { + DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + EvictionCost MaxCost(1); + if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + evictInterference(VirtReg, Hint, NewVRegs); + return Hint; + } + } + + // Try to evict interference from a cheaper alternative. unsigned Cost = TRI->getCostPerUse(PhysReg); // Most registers have 0 additional cost. @@ -393,31 +427,58 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -/// canEvict - determine if A can evict the assigned live range B. The eviction -/// policy defined by this function together with the allocation order defined -/// by enqueue() decides which registers ultimately end up being split and -/// spilled. +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. +/// +/// Cascade numbers are used to prevent infinite loops if this function is a +/// cyclic relation. /// -/// This function must define a non-circular relation when it returns CE_Always, -/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second -/// range, it is possible to return CE_WithSplit which forces the evicted -/// register to be split or spilled before it can evict anything again. That -/// guarantees progress. -RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) { - return A.weight > B.weight ? CE_Always : CE_Never; +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, bool BreaksHint) { + bool CanSplit = getStage(B) <= RS_Second; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + + return A.weight > B.weight; } -/// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. -/// Return false if any interference is heavier than MaxWeight. -/// On return, set MaxWeight to the maximal spill weight of an interference. +/// canEvictInterference - Return true if all interferences between VirtReg and +/// PhysReg can be evicted. When OnlyCheap is set, don't do anything +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @prarm IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - float &MaxWeight) { - float Weight = 0; + bool IsHint, EvictionCost &MaxCost) { + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never + // involved in an eviction before. If a cascade number was assigned, deny + // evicting anything with the same or a newer cascade number. This prevents + // infinite eviction loops. + // + // This works out so a register without a cascade number is allowed to evict + // anything, and it can be evicted by anything. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = NextCascade; + + EvictionCost Cost; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); // If there is 10 or more interferences, chances are one is heavier. - if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) + if (Q.collectInterferingVRegs(10) >= 10) return false; // Check if any interfering live range is heavier than MaxWeight. @@ -425,25 +486,69 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->weight >= MaxWeight) - return false; - switch (canEvict(VirtReg, *Intf)) { - case CE_Always: - break; - case CE_Never: + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Spill) return false; - case CE_WithSplit: - if (getStage(*Intf) > RS_Second) + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + if (Cascade <= IntfCascade) { + if (!Urgent) return false; - break; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; } - Weight = std::max(Weight, Intf->weight); + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + // Finally, apply the eviction policy for non-urgent evictions. + if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; } } - MaxWeight = Weight; + MaxCost = Cost; return true; } +/// evictInterference - Evict any interferring registers that prevent VirtReg +/// from being assigned to Physreg. This assumes that canEvictInterference +/// returned true. +void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // Make sure that VirtReg has a cascade number, and assign that cascade + // number to every evicted register. These live ranges than then only be + // evicted by a newer cascade, preventing infinite loops. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + + DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + unassign(*Intf, VRM->getPhys(Intf->reg)); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); + } + } +} + /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. @@ -454,31 +559,37 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // Keep track of the lightest single interference seen so far. - float BestWeight = HUGE_VALF; + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost(~0u); unsigned BestPhys = 0; + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < ~0u) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight; + } + Order.rewind(); while (unsigned PhysReg = Order.next()) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; - // The first use of a register in a function has cost 1. - if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) - continue; - - float Weight = BestWeight; - if (!canEvictInterference(VirtReg, PhysReg, Weight)) - continue; - - // This is an eviction candidate. - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " - << Weight << '\n'); - if (BestPhys && Weight >= BestWeight) + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1) + if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) + if (!MRI->isPhysRegUsed(CSR)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(CSR, TRI) << '\n'); + continue; + } + + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) continue; // Best so far. BestPhys = PhysReg; - BestWeight = Weight; + // Stop if the hint can be used. if (Order.isHint(PhysReg)) break; @@ -487,22 +598,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, if (!BestPhys) return 0; - DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n"); - for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - ++NumEvicted; - NewVRegs.push_back(Intf); - // Prevent looping by forcing the evicted ranges to be split before they - // can evict anything else. - if (getStage(*Intf) < RS_Second && - canEvict(VirtReg, *Intf) == CE_WithSplit) - LRStage[Intf->reg] = RS_Second; - } - } + evictInterference(VirtReg, BestPhys, NewVRegs); return BestPhys; } @@ -621,8 +717,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); } -void RAGreedy::growRegion(GlobalSplitCandidate &Cand, - InterferenceCache::Cursor Intf) { +void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Keep track of through blocks that have not been added to SpillPlacer. BitVector Todo = SA->getThroughBlocks(); SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; @@ -633,8 +728,6 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand, for (;;) { ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); - if (NewBundles.empty()) - break; // Find new through blocks in the periphery of PrefRegBundles. for (int i = 0, e = NewBundles.size(); i != e; ++i) { unsigned Bundle = NewBundles[i]; @@ -654,12 +747,12 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand, } } // Any new blocks to add? - if (ActiveBlocks.size() > AddedTo) { - ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], - ActiveBlocks.size() - AddedTo); - addThroughConstraints(Intf, Add); - AddedTo = ActiveBlocks.size(); - } + if (ActiveBlocks.size() == AddedTo) + break; + addThroughConstraints(Cand.Intf, + ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo)); + AddedTo = ActiveBlocks.size(); + // Perhaps iterating can enable more bundles? SpillPlacer->iterate(); } @@ -697,8 +790,7 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - InterferenceCache::Cursor Intf) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { float GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); @@ -725,8 +817,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, continue; if (RegIn && RegOut) { // We need double spill code if this block has interference. - Intf.moveToBlock(Number); - if (Intf.hasInterference()) + Cand.Intf.moveToBlock(Number); + if (Cand.Intf.hasInterference()) GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); continue; } @@ -756,188 +848,42 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, dbgs() << ".\n"; }); - InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); + InterferenceCache::Cursor &Intf = Cand.Intf; LiveRangeEdit LREdit(VirtReg, NewVRegs, this); SE->reset(LREdit); // Create the main cross-block interval. const unsigned MainIntv = SE->openIntv(); - // First add all defs that are live out of a block. + // First handle all the blocks with uses. ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + bool RegIn = BI.LiveIn && + LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; + bool RegOut = BI.LiveOut && + LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; // Create separate intervals for isolated blocks with multiple uses. - if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + if (!RegIn && !RegOut) { DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); - SE->splitSingleBlock(BI); - SE->selectIntv(MainIntv); - continue; - } - - // Should the register be live out? - if (!BI.LiveOut || !RegOut) - continue; - - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" - << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); - - // The interference interval should either be invalid or overlap MBB. - assert((!Intf.hasInterference() || Intf.first() < Stop) - && "Bad interference"); - assert((!Intf.hasInterference() || Intf.last() > Start) - && "Bad interference"); - - // Check interference leaving the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", not live-through.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; + if (!BI.isOneInstr()) { + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); } - if (!RegIn) { - // Block is live-through, but entry bundle is on the stack. - // Reload just before the first use. - DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); - continue; - } - DEBUG(dbgs() << ", live-through.\n"); continue; } - // Block has interference. - DEBUG(dbgs() << ", interference to " << Intf.last()); - - if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n'); - SE->useIntv(BI.FirstUse, Stop); - continue; - } - - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - if (Intf.last().getBoundaryIndex() < BI.LastUse) { - // There are interference-free uses at the end of the block. - // Find the first use that can get the live-out register. - SmallVectorImpl<SlotIndex>::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.last().getBoundaryIndex()); - assert(UI != SA->UseSlots.end() && "Couldn't find last use"); - SlotIndex Use = *UI; - assert(Use <= BI.LastUse && "Couldn't find last use"); - // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= LastSplitPoint) { - DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE->enterIntvBefore(Use); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - assert(SegStart < LastSplitPoint && "Impossible split point"); - SE->useIntv(SegStart, Stop); - continue; - } - } - - // Interference is after the last use. - DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); - assert(SegStart >= Intf.last() && "Couldn't avoid interference"); - } - - // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0; i != UseBlocks.size(); ++i) { - const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; - - // Is the register live-in? - if (!BI.LiveIn || !RegIn) - continue; - - // We have an incoming register. Check for interference. - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); Intf.moveToBlock(BI.MBB->getNumber()); - DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' - << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop - << ')'); - // Check interference entering the block. - if (!Intf.hasInterference()) { - // Block is interference-free. - DEBUG(dbgs() << ", no interference"); - if (!BI.LiveThrough) { - DEBUG(dbgs() << ", killed in block.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - if (!RegOut) { - SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); - // Block is live-through, but exit bundle is on the stack. - // Spill immediately after the last use. - if (BI.LastUse < LastSplitPoint) { - DEBUG(dbgs() << ", uses, stack-out.\n"); - SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); - continue; - } - // The last use is after the last split point, it is probably an - // indirect jump. - DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); - SE->useIntv(Start, SegEnd); - // Run a double interval from the split to the last use. - // This makes it possible to spill the complement without affecting the - // indirect branch. - SE->overlapIntv(SegEnd, BI.LastUse); - continue; - } - // Register is live-through. - DEBUG(dbgs() << ", uses, live-through.\n"); - SE->useIntv(Start, Stop); - continue; - } - - // Block has interference. - DEBUG(dbgs() << ", interference from " << Intf.first()); - - if (!BI.LiveThrough && Intf.first() >= BI.LastUse) { - // The interference doesn't reach the outgoing segment. - DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n'); - SE->useIntv(Start, BI.LastUse); - continue; - } - - if (Intf.first().getBaseIndex() > BI.FirstUse) { - // There are interference-free uses at the beginning of the block. - // Find the last use that can get the register. - SmallVectorImpl<SlotIndex>::const_iterator UI = - std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - Intf.first().getBaseIndex()); - assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); - SlotIndex Use = (--UI)->getBoundaryIndex(); - DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE->leaveIntvAfter(Use); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); - SE->useIntv(Start, SegEnd); - continue; - } - - // Interference is before the first use. - DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); + if (RegIn && RegOut) + SE->splitLiveThroughBlock(BI.MBB->getNumber(), + MainIntv, Intf.first(), + MainIntv, Intf.last()); + else if (RegIn) + SE->splitRegInBlock(BI, MainIntv, Intf.first()); + else + SE->splitRegOutBlock(BI, MainIntv, Intf.last()); } // Handle live-through blocks. @@ -945,20 +891,11 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned Number = Cand.ActiveBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; - DEBUG(dbgs() << "Live through BB#" << Number << '\n'); - if (RegIn && RegOut) { - Intf.moveToBlock(Number); - if (!Intf.hasInterference()) { - SE->useIntv(Indexes->getMBBStartIdx(Number), - Indexes->getMBBEndIdx(Number)); - continue; - } - } - MachineBasicBlock *MBB = MF->getBlockNumbered(Number); - if (RegIn) - SE->leaveIntvAtTop(*MBB); - if (RegOut) - SE->enterIntvAtEnd(*MBB); + if (!RegIn && !RegOut) + continue; + Intf.moveToBlock(Number); + SE->splitLiveThroughBlock(Number, RegIn ? MainIntv : 0, Intf.first(), + RegOut ? MainIntv : 0, Intf.last()); } ++NumGlobalSplits; @@ -967,7 +904,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); // Sort out the new intervals created by splitting. We get four kinds: @@ -976,27 +913,27 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - unsigned Reg = LREdit.get(i)->reg; + LiveInterval &Reg = *LREdit.get(i); // Ignore old intervals from DCE. - if (LRStage[Reg] != RS_New) + if (getStage(Reg) != RS_New) continue; // Remainder interval. Don't try splitting again, spill if it doesn't // allocate. if (IntvMap[i] == 0) { - LRStage[Reg] = RS_Global; + setStage(Reg, RS_Global); continue; } // Main interval. Allow repeated splitting as long as the number of live // blocks is strictly decreasing. if (IntvMap[i] == MainIntv) { - if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { + if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks << " blocks as original.\n"); // Don't allow repeated splitting as a safe guard against looping. - LRStage[Reg] = RS_Global; + setStage(Reg, RS_Global); } continue; } @@ -1015,17 +952,34 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); const unsigned NoCand = ~0u; unsigned BestCand = NoCand; + unsigned NumCands = 0; Order.rewind(); - for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { - if (GlobalCand.size() <= Cand) - GlobalCand.resize(Cand+1); - GlobalCand[Cand].reset(PhysReg); + while (unsigned PhysReg = Order.next()) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + } + + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); - SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); + SpillPlacer->prepare(Cand.LiveBundles); float Cost; - InterferenceCache::Cursor Intf(IntfCache, PhysReg); - if (!addSplitConstraints(Intf, Cost)) { + if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } @@ -1040,28 +994,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); continue; } - growRegion(GlobalCand[Cand], Intf); + growRegion(Cand); SpillPlacer->finish(); // No live bundles, defer to splitSingleBlocks(). - if (!GlobalCand[Cand].LiveBundles.any()) { + if (!Cand.LiveBundles.any()) { DEBUG(dbgs() << " no bundles.\n"); continue; } - Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); + Cost += calcGlobalSplitCost(Cand); DEBUG({ dbgs() << ", total = " << Cost << " with bundles"; - for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; - i = GlobalCand[Cand].LiveBundles.find_next(i)) + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); if (Cost < BestCost) { - BestCand = Cand; + BestCand = NumCands; BestCost = Hysteresis * Cost; // Prevent rounding effects. } + ++NumCands; } if (BestCand == NoCand) @@ -1302,10 +1257,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (NewGaps >= NumGaps) { DEBUG(dbgs() << "Tagging non-progress ranges: "); assert(!ProgressRequired && "Didn't make progress when it was required."); - LRStage.resize(MRI->getNumVirtRegs()); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - LRStage[LREdit.get(i)->reg] = RS_Local; + setStage(*LREdit.get(i), RS_Local); DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); } DEBUG(dbgs() << '\n'); @@ -1384,7 +1338,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, return PhysReg; LiveRangeStage Stage = getStage(VirtReg); - DEBUG(dbgs() << StageName[Stage] << '\n'); + DEBUG(dbgs() << StageName[Stage] + << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Second ranges already failed to do this, and they should not @@ -1399,7 +1354,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. if (Stage == RS_First) { - LRStage[VirtReg.reg] = RS_Second; + setStage(VirtReg, RS_Second); DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; @@ -1407,7 +1362,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class wil report it. - if (Stage >= RS_Spill) + if (Stage >= RS_Spill || !VirtReg.isSpillable()) return ~0u; // Try splitting VirtReg or interferences. @@ -1443,15 +1398,15 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis<MachineLoopInfo>(); - LoopRanges = &getAnalysis<MachineLoopRanges>(); Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); - LRStage.clear(); - LRStage.resize(MRI->getNumVirtRegs()); + ExtraRegInfo.clear(); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + NextCascade = 1; IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); allocatePhysRegs(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 5ef88cb74ba5..0dd3c598c154 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -16,7 +16,9 @@ #include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "RegisterClassInfo.h" #include "Spiller.h" +#include "RegisterCoalescer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -27,7 +29,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -57,11 +58,6 @@ NewHeuristic("new-spilling-heuristic", cl::init(false), cl::Hidden); static cl::opt<bool> -PreSplitIntervals("pre-alloc-split", - cl::desc("Pre-register allocation live interval splitting"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); @@ -100,10 +96,9 @@ namespace { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup( + initializeRegisterCoalescerPass( *PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -148,6 +143,7 @@ namespace { BitVector reservedRegs_; LiveIntervals* li_; MachineLoopInfo *loopInfo; + RegisterClassInfo RegClassInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -215,8 +211,6 @@ namespace { // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive<RegisterCoalescer>(); AU.addRequired<CalculateSpillWeights>(); - if (PreSplitIntervals) - AU.addRequiredID(PreAllocSplittingID); AU.addRequiredID(LiveStacksID); AU.addPreservedID(LiveStacksID); AU.addRequired<MachineLoopInfo>(); @@ -366,13 +360,10 @@ namespace { /// getFirstNonReservedPhysReg - return the first non-reserved physical /// register in the register class. unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); - TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); - while (i != aoe && reservedRegs_.test(*i)) - ++i; - assert(i != aoe && "All registers reserved?!"); - return *i; - } + ArrayRef<unsigned> O = RegClassInfo.getOrder(RC); + assert(!O.empty() && "All registers reserved?!"); + return O.front(); + } void ComputeRelatedRegClasses(); @@ -402,11 +393,10 @@ INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", "Linear Scan Register Allocator", false, false) @@ -524,6 +514,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis<LiveIntervals>(); loopInfo = &getAnalysis<MachineLoopInfo>(); + RegClassInfo.runOnMachineFunction(fn); // We don't run the coalescer here because we have no reason to // interact with it. If the coalescer requires interaction, it @@ -1166,14 +1157,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { bool Found = false; std::vector<std::pair<unsigned,float> > RegsWeights; + ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC); if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned reg = Order[i]; float regWeight = SpillWeights[reg]; - // Don't even consider reserved regs. - if (reservedRegs_.test(reg)) - continue; // Skip recently allocated registers and reserved registers. if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; @@ -1182,11 +1170,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // If we didn't find a register that is spillable, try aliases? if (!Found) { - for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), - e = RC->allocation_order_end(*mf_); i != e; ++i) { - unsigned reg = *i; - if (reservedRegs_.test(reg)) - continue; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned reg = Order[i]; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) @@ -1446,13 +1431,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) physReg = vrm_->getPhys(physReg); - TargetRegisterClass::iterator I, E; - tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_); - assert(I != E && "No allocatable register in this register class!"); + ArrayRef<unsigned> Order; + if (Hint.first) + Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_); + else + Order = RegClassInfo.getOrder(RC); + + assert(!Order.empty() && "No allocatable register in this register class!"); // Scan for the first available register. - for (; I != E; ++I) { - unsigned Reg = *I; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned Reg = Order[i]; // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; @@ -1482,8 +1471,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be // reevaluated now. - for (; I != E; ++I) { - unsigned Reg = *I; + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned Reg = Order[i]; // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 1e1f1e0d3470..72230d4b0c5c 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -35,6 +35,7 @@ #include "Splitter.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "RegisterCoalescer.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -46,7 +47,6 @@ #include "llvm/CodeGen/PBQP/Graph.h" #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -84,11 +84,11 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::auto_ptr<PBQPBuilder> b) - : MachineFunctionPass(ID), builder(b) { + RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0) + : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -122,6 +122,8 @@ private: std::auto_ptr<PBQPBuilder> builder; + char *customPassID; + MachineFunction *mf; const TargetMachine *tm; const TargetRegisterInfo *tri; @@ -222,10 +224,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; VRAllowed vrAllowed; - for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf), - aoEnd = trc->allocation_order_end(*mf); - aoItr != aoEnd; ++aoItr) { - unsigned preg = *aoItr; + ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf); + for (unsigned i = 0; i != rawOrder.size(); ++i) { + unsigned preg = rawOrder[i]; if (!reservedRegs.test(preg)) { vrAllowed.push_back(preg); } @@ -450,6 +451,8 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); au.addRequired<RegisterCoalescer>(); + if (customPassID) + au.addRequiredID(*customPassID); au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); @@ -581,7 +584,7 @@ void RegAllocPBQP::finalizeAlloc() const { if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); - physReg = *liRC->allocation_order_begin(*mf); + physReg = liRC->getRawAllocationOrder(*mf).front(); } vrm->assignVirt2Phys(li->reg, physReg); @@ -703,8 +706,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { } FunctionPass* llvm::createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder> builder) { - return new RegAllocPBQP(builder); + std::auto_ptr<PBQPBuilder> builder, + char *customPassID) { + return new RegAllocPBQP(builder, customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 75b0c90be8fc..5a77e47bc591 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -81,11 +81,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. - TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); - - for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { - unsigned PhysReg = *I; + ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF); + for (unsigned i = 0; i != RawOrder.size(); ++i) { + unsigned PhysReg = RawOrder[i]; // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; @@ -103,7 +101,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { DEBUG({ dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; - for (unsigned I = 0; I != N; ++I) + for (unsigned I = 0; I != RCI.NumRegs; ++I) dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); dbgs() << " ]\n"; }); diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h index 6f7d9c94969c..d21fd67efe8b 100644 --- a/lib/CodeGen/RegisterClassInfo.h +++ b/lib/CodeGen/RegisterClassInfo.h @@ -112,7 +112,7 @@ public: /// register, so a register allocator needs to track its liveness and /// availability. bool isAllocatable(unsigned PhysReg) const { - return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg); + return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg); } }; } // end namespace llvm diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 407559a211a0..b91f92c6aa5a 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -13,38 +13,92 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/RegisterCoalescer.h" +#define DEBUG_TYPE "regcoalescing" +#include "RegisterCoalescer.h" +#include "VirtRegMap.h" +#include "LiveDebugVariables.h" + +#include "llvm/Pass.h" +#include "llvm/Value.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Pass.h" - +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cmath> using namespace llvm; -// Register the RegisterCoalescer interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", - SimpleRegisterCoalescing) -char RegisterCoalescer::ID = 0; +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); +STATISTIC(numAborts , "Number of times interval joining aborted"); -// RegisterCoalescer destructor: DO NOT move this to the header file -// for RegisterCoalescer or else clients of the RegisterCoalescer -// class may not depend on the RegisterCoalescer.o file in the current -// .a file, causing alias analysis support to not be included in the -// tool correctly! -// -RegisterCoalescer::~RegisterCoalescer() {} +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt<bool> +DisableCrossClassJoin("disable-cross-class-join", + cl::desc("Avoid coalescing cross register class copies"), + cl::init(false), cl::Hidden); -unsigned CoalescerPair::compose(unsigned a, unsigned b) const { +static cl::opt<bool> +EnablePhysicalJoin("join-physregs", + cl::desc("Join physical register copies"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +VerifyCoalescing("verify-coalescing", + cl::desc("Verify machine instrs before and after register coalescing"), + cl::Hidden); + +INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) +INITIALIZE_PASS_DEPENDENCY(PHIElimination) +INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) + +char RegisterCoalescer::ID = 0; + +static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { if (!a) return b; if (!b) return a; - return tri_.composeSubRegIndices(a, b); + return tri.composeSubRegIndices(a, b); } -bool CoalescerPair::isMoveInstr(const MachineInstr *MI, - unsigned &Src, unsigned &Dst, - unsigned &SrcSub, unsigned &DstSub) const { +static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) { if (MI->isCopy()) { Dst = MI->getOperand(0).getReg(); DstSub = MI->getOperand(0).getSubReg(); @@ -52,7 +106,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI, SrcSub = MI->getOperand(1).getSubReg(); } else if (MI->isSubregToReg()) { Dst = MI->getOperand(0).getReg(); - DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + DstSub = compose(tri, MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); } else @@ -66,7 +121,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { flipped_ = crossClass_ = false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) return false; partial_ = SrcSub || DstSub; @@ -156,7 +211,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (!MI) return false; unsigned Src, Dst, SrcSub, DstSub; - if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub)) return false; // Find the virtual register that is srcReg_. @@ -185,13 +240,1558 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (dstReg_ != Dst) return false; // Registers match, do the subregisters line up? - return compose(subIdx_, SrcSub) == DstSub; + return compose(tri_, subIdx_, SrcSub) == DstSub; + } +} + +void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(StrongPHIEliminationID); + AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { + /// Joined copies are not deleted immediately, but kept in JoinedCopies. + JoinedCopies.insert(CopyMI); + + /// Mark all register operands of CopyMI as <undef> so they won't affect dead + /// code elimination. + for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), + E = CopyMI->operands_end(); I != E; ++I) + if (I->isReg()) + I->setIsUndef(true); +} + +/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// being the source and IntB being the dest, thus this defines a value number +/// in IntB. If the source value number (in IntA) is defined by a copy from B, +/// see if we can merge these two pieces of B into a single value number, +/// eliminating a copy. For example: +/// +/// A3 = B0 +/// ... +/// B1 = A3 <- this copy +/// +/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 +/// value number to be replaced with B0 (which simplifies the B liveinterval). +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); + if (BLR == IntB.end()) return false; + VNInfo *BValNo = BLR->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (!BValNo->isDefByCopy()) return false; + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; + VNInfo *AValNo = ALR->valno; + // If it's re-defined by an early clobber somewhere in the live range, then + // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. + // See PR3149: + // 172 %ECX<def> = MOV32rr %reg1039<kill> + // 180 INLINEASM <es:subl $5,$1 + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, + // %EAX<kill>, + // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 + // 188 %EAX<def> = MOV32rr %EAX<kill> + // 196 %ECX<def> = MOV32rr %ECX<kill> + // 204 %ECX<def> = MOV32rr %ECX<kill> + // 212 %EAX<def> = MOV32rr %EAX<kill> + // 220 %EAX<def> = MOV32rr %EAX + // 228 %reg1039<def> = MOV32rr %ECX<kill> + // The early clobber operand ties ECX input to the ECX def. + // + // The live interval of ECX is represented as this: + // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) + // The coalescer has no idea there was a def in the middle of [174,230]. + if (AValNo->hasRedefByEC()) + return false; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + if (!CP.isCoalescable(AValNo->getCopy())) + return false; + + // Get the LiveRange in IntB that this value number starts with. + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); + if (ValLR == IntB.end()) + return false; + + // Make sure that the end of the live range is inside the same block as + // CopyMI. + MachineInstr *ValLREndInst = + li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; + + // Okay, we now know that ValLR ends in the same block that the CopyMI + // live-range starts. If there are no intervening live ranges between them in + // IntB, we can merge them. + if (ValLR+1 != BLR) return false; + + // If a live interval is a physical register, conservatively check if any + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { + DEBUG({ + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); + }); + return false; + } + } + + DEBUG({ + dbgs() << "Extending: "; + IntB.print(dbgs(), tri_); + }); + + SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + BValNo->setCopy(0); + + // Okay, we can merge them. We need to insert a new liverange: + // [ValLR.end, BLR.begin) of either value number, then we merge the + // two value numbers. + IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + + // If the IntB live range is assigned to a physical register, and if that + // physreg has sub-registers, update their live intervals as well. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &SRLI = li_->getInterval(*SR); + SRLI.addRange(LiveRange(FillerStart, FillerEnd, + SRLI.getNextValue(FillerStart, 0, + li_->getVNInfoAllocator()))); + } + } + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValLR->valno) { + // If B1 is killed by a PHI, then the merged live range must also be killed + // by the same PHI, as B0 and B1 can not overlap. + bool HasPHIKill = BValNo->hasPHIKill(); + IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (HasPHIKill) + ValLR->valno->setHasPHIKill(true); + } + DEBUG({ + dbgs() << " result = "; + IntB.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValLREndInst->getOperand(UIdx).setIsKill(false); + } + + // If the copy instruction was killing the destination register before the + // merge, find the last use and trim the live range. That will also add the + // isKill marker. + if (ALR->end == CopyIdx) + li_->shrinkToUses(&IntA); + + ++numExtends; + return true; +} + +/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// other than BValNo val# that can reach uses of AValno val# of IntA. +bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + LiveInterval::Ranges::iterator BI = + std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); + if (BI != IntB.ranges.begin()) + --BI; + for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= AI->start && BI->end > AI->start) + return true; + if (BI->start > AI->start && BI->start < AI->end) + return true; + } } + return false; } -// Because of the way .a files work, we must force the SimpleRC -// implementation to be pulled in if the RegisterCoalescer classes are -// pulled in. Otherwise we run the risk of RegisterCoalescer being -// used, but the default implementation not being linked into the tool -// that uses it. -DEFINING_FILE_FOR(RegisterCoalescer) +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, +/// +/// A3 = op A2 B0<kill> +/// ... +/// B1 = A3 <- this copy +/// ... +/// = op A3 <- more uses +/// +/// ==> +/// +/// B2 = op B0 A2<kill> +/// ... +/// B1 = B2 <- now an identify copy +/// ... +/// = op B2 <- more uses +/// +/// This returns true if an interval was modified. +/// +bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + // FIXME: For now, only eliminate the copy by commuting its def when the + // source register is a virtual register. We want to guard against cases + // where the copy is a back edge copy and commuting the def lengthen the + // live interval of the source register to the entire loop. + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // the example above. + VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); + if (!BValNo || !BValNo->isDefByCopy()) + return false; + + assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + + // AValNo is the value number in A that defines the copy, A3 in the example. + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + assert(AValNo && "COPY source not live"); + + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; + const MCInstrDesc &MCID = DefMI->getDesc(); + if (!MCID.isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !NewDstMO.isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // Abort if the aliases of IntB.reg have values that are not simply the + // clobbers from the superreg. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && + HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineRegisterInfo::use_nodbg_iterator UI = + mri_->use_nodbg_begin(IntA.reg), + UE = mri_->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + SlotIndex UseIdx = li_->getInstructionIndex(UseMI); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end()) + continue; + if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + return false; + } + + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = tii_->commuteInstruction(DefMI); + if (!NewMI) + return false; + if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && + TargetRegisterInfo::isVirtualRegister(IntB.reg) && + !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) + return false; + if (NewMI != DefMI) { + li_->ReplaceMachineInstrInMaps(DefMI, NewMI); + MBB->insert(DefMI, NewMI); + MBB->erase(DefMI); + } + unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); + NewMI->getOperand(OpIdx).setIsKill(); + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + + // Update uses of IntA of the specific Val# with IntB. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), + UE = mri_->use_end(); UI != UE;) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (JoinedCopies.count(UseMI)) + continue; + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } + SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); + LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); + if (ULR == IntA.end() || ULR->valno != AValNo) + continue; + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (!UseMI->isCopy()) + continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + + // This copy will become a noop. If it's defining a new val#, merge it into + // BValNo. + SlotIndex DefIdx = UseIdx.getDefIndex(); + VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); + if (!DVNI) + continue; + DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + assert(DVNI->def == DefIdx); + BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + markAsJoined(UseMI); + } + + // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // is updated. + VNInfo *ValNo = BValNo; + ValNo->def = AValNo->def; + ValNo->setCopy(0); + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); + AI != AE; ++AI) { + if (AI->valno != AValNo) continue; + IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + } + DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); + + IntA.removeValNo(AValNo); + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); + ++numCommutes; + return true; +} + +/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// computation, replace the copy by rematerialize the definition. +bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, + bool preserveSrcInt, + unsigned DstReg, + unsigned DstSubIdx, + MachineInstr *CopyMI) { + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); + LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); + assert(SrcLR != SrcInt.end() && "Live range not found!"); + VNInfo *ValNo = SrcLR->valno; + // If other defs can reach uses of this def, then it's not safe to perform + // the optimization. + if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) + return false; + MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + if (!DefMI) + return false; + assert(DefMI && "Defining instruction disappeared"); + const MCInstrDesc &MCID = DefMI->getDesc(); + if (!MCID.isAsCheapAsAMove()) + return false; + if (!tii_->isTriviallyReMaterializable(DefMI, AA)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(tii_, AA, SawStore)) + return false; + if (MCID.getNumDefs() != 1) + return false; + if (!DefMI->isImplicitDef()) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (mri_->getRegClass(DstReg) != RC) + return false; + } else if (!RC->contains(DstReg)) + return false; + } + + // If destination register has a sub-register index on it, make sure it + // matches the instruction register class. + if (DstSubIdx) { + const MCInstrDesc &MCID = DefMI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = + DstRC->getSubRegisterRegClass(DstSubIdx); + const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_); + if (DefRC == DstRC) + DstSubIdx = 0; + else if (DefRC != DstSubRC) + return false; + } + + RemoveCopyFlag(DstReg, CopyMI); + + MachineBasicBlock *MBB = CopyMI->getParent(); + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); + MachineInstr *NewMI = prior(MII); + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); + } + + NewMI->copyImplicitOps(CopyMI); + li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ReMatCopies.insert(CopyMI); + ReMatDefs.insert(DefMI); + DEBUG(dbgs() << "Remat: " << *NewMI); + ++NumReMats; + + // The source interval can become smaller because we removed a use. + if (preserveSrcInt) + li_->shrinkToUses(&SrcInt); + + return true; +} + +/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// update the subregister number if it is not zero. If DstReg is a +/// physical register and the existing subregister number of the def / use +/// being updated is not zero, make sure to set it to the correct physical +/// subregister. +void +RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + // Update LiveDebugVariables. + ldv_->renameRegister(SrcReg, DstReg, SubIdx); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. + if (DstIsPhys) { + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } + + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); + } + + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); + } +} + +/// removeIntervalIfEmpty - Check if the live interval of a physical register +/// is empty, if so remove it and also remove the empty intervals of its +/// sub-registers. Return true if live interval is removed. +static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, + const TargetRegisterInfo *tri_) { + if (li.empty()) { + if (TargetRegisterInfo::isPhysicalRegister(li.reg)) + for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; + LiveInterval &sli = li_->getInterval(*SR); + if (sli.empty()) + li_->removeInterval(*SR); + } + li_->removeInterval(li.reg); + return true; + } + return false; +} + +/// RemoveDeadDef - If a def of a live interval is now determined dead, remove +/// the val# it defines. If the live interval becomes empty, remove it as well. +bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, + MachineInstr *DefMI) { + SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); + LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); + if (DefIdx != MLR->valno->def) + return false; + li.removeValNo(MLR->valno); + return removeIntervalIfEmpty(li, li_, tri_); +} + +void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->def == DefIdx) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->def == DefIdx) + LR->valno->setCopy(0); + } +} + +/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. +/// We need to be careful about coalescing a source physical register with a +/// virtual register. Once the coalescing is done, it cannot be broken and these +/// are not spillable! If the destination interval uses are far away, think +/// twice about coalescing them! +bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + if (!EnablePhysicalJoin) { + DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); + return false; + } + + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial()) { + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold) { + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + return false; + } + } + return true; +} + +/// isWinToJoinCrossClass - Return true if it's profitable to coalesce +/// two virtual registers from different register classes. +bool +RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + + // Coalesce aggressively if the intervals are small compared to the number of + // registers in the new class. The number 4 is fairly arbitrary, chosen to be + // less aggressive than the 8 used for the whole function size. + const unsigned ThresSize = 4 * NewRCCount; + if (SrcSize <= ThresSize && DstSize <= ThresSize) + return true; + + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + unsigned NewUses = SrcUses + DstUses; + unsigned NewSize = SrcSize + DstSize; + if (SrcRC != NewRC && SrcSize > ThresSize) { + unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); + if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) + return false; + } + if (DstRC != NewRC && DstSize > ThresSize) { + unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); + if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) + return false; + } + return true; +} + + +/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// which are the src/dst of the copy instruction CopyMI. This returns true +/// if the copy was successfully coalesced away. If it is not currently +/// possible to coalesce this interval, but it may be possible if other +/// things get coalesced, then it returns true by reference in 'Again'. +bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { + + Again = false; + if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) + return false; // Already done. + + DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + + // If they are already joined we continue. + if (CP.getSrcReg() == CP.getDstReg()) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tCopy already coalesced.\n"); + return false; // Not coalescable. + } + + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) + << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << "\n"); + + // Enforce policies. + if (CP.isPhys()) { + if (!shouldJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + return false; + } + } else { + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); + return false; + } + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); + Again = true; // May be possible to coalesce later. + return false; + } + } + + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (!JoinIntervals(CP)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + + // If we can eliminate the copy without merging the live ranges, do so now. + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } + } + + // Otherwise, we are unable to join the intervals. + DEBUG(dbgs() << "\tInterference!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CP.isCrossClass()) { + ++numCrossRCs; + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } + + // Remember to delete the copy instruction. + markAsJoined(CopyMI); + + UpdateRegDefsUses(CP); + + // If we have extended the live range of a physical register, make sure we + // update live-in lists as well. + if (CP.isPhys()) { + SmallVector<MachineBasicBlock*, 16> BlockSeq; + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { + li_->findLiveInMBBs(I->start, I->end, BlockSeq); + for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { + MachineBasicBlock &block = *BlockSeq[idx]; + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); + } + BlockSeq.clear(); + } + } + + // SrcReg is guarateed to be the register whose live interval that is + // being merged. + li_->removeInterval(CP.getSrcReg()); + + // Update regalloc hint. + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); + + DEBUG({ + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); + + ++numJoins; + return true; +} + +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector<VNInfo*, 16> &NewVNInfo, + DenseMap<VNInfo*, VNInfo*> &ThisFromOther, + DenseMap<VNInfo*, VNInfo*> &OtherFromThis, + SmallVector<int, 16> &ThisValNoAssignments, + SmallVector<int, 16> &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; + assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + + +// Find out if we have something like +// A = X +// B = X +// if so, we can pretend this is actually +// A = X +// B = A +// which allows us to coalesce A and B. +// VNI is the definition of B. LR is the life range of A that includes +// the slot just before B. If we return true, we add "B = X" to DupCopies. +static bool RegistersDefinedFromSameValue(LiveIntervals &li, + const TargetRegisterInfo &tri, + CoalescerPair &CP, + VNInfo *VNI, + LiveRange *LR, + SmallVector<MachineInstr*, 8> &DupCopies) { + // FIXME: This is very conservative. For example, we don't handle + // physical registers. + + MachineInstr *MI = VNI->getCopy(); + + if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + return false; + + unsigned Dst = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(Src) || + !TargetRegisterInfo::isVirtualRegister(Dst)) + return false; + + unsigned A = CP.getDstReg(); + unsigned B = CP.getSrcReg(); + + if (B == Dst) + std::swap(A, B); + assert(Dst == A); + + VNInfo *Other = LR->valno; + if (!Other->isDefByCopy()) + return false; + const MachineInstr *OtherMI = Other->getCopy(); + + if (!OtherMI->isFullCopy()) + return false; + + unsigned OtherDst = OtherMI->getOperand(0).getReg(); + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || + !TargetRegisterInfo::isVirtualRegister(OtherDst)) + return false; + + assert(OtherDst == B); + + if (Src != OtherSrc) + return false; + + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def)) + return false; + + DupCopies.push_back(MI); + + return true; +} + +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); + return false; + } + + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); + return false; + } + } + } + } + } + + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector<int, 16> LHSValNoAssignments; + SmallVector<int, 16> RHSValNoAssignments; + DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; + DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; + SmallVector<VNInfo*, 16> NewVNInfo; + + SmallVector<MachineInstr*, 8> DupCopies; + + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + MachineInstr *MI = VNI->getCopy(); + if (!CP.isCoalescable(MI) && + !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + continue; + + LHSValsDefinedFromRHS[VNI] = lr->valno; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + continue; + + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; + + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + MachineInstr *MI = VNI->getCopy(); + if (!CP.isCoalescable(MI) && + !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies)) + continue; + + RHSValsDefinedFromLHS[VNI] = lr->valno; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Skip ahead until the first place of potential sharing. + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + } + + while (I != IE && J != JE) { + // Determine if these two live ranges overlap. + bool Overlaps; + if (I->start < J->start) { + Overlaps = I->end > J->start; + } else { + Overlaps = J->end > I->start; + } + + // If so, check value # info to determine if they are really different. + if (Overlaps) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) + return false; + } + + if (I->end < J->end) + ++I; + else + ++J; + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), + E = LHSValsDefinedFromRHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned LHSValID = LHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[LHSValID]->setHasPHIKill(true); + } + + // Update kill info. Some live ranges are extended due to copy coalescing. + for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), + E = RHSValsDefinedFromLHS.end(); I != E; ++I) { + VNInfo *VNI = I->first; + unsigned RHSValID = RHSValNoAssignments[VNI->id]; + if (VNI->hasPHIKill()) + NewVNInfo[RHSValID]->setHasPHIKill(true); + } + + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + + SmallVector<unsigned, 8> SourceRegisters; + for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), + E = DupCopies.end(); I != E; ++I) { + MachineInstr *MI = *I; + + // We have pretended that the assignment to B in + // A = X + // B = X + // was actually a copy from A. Now that we decided to coalesce A and B, + // transform the code into + // A = X + // X = X + // and mark the X as coalesced to keep the illusion. + unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); + MI->getOperand(0).substVirtReg(Src, 0, *tri_); + + markAsJoined(MI); + } + + // If B = X was the last use of X in a liverange, we have to shrink it now + // that B = X is gone. + for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), + E = SourceRegisters.end(); I != E; ++I) { + li_->shrinkToUses(&li_->getInterval(*I)); + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); + return true; +} + +namespace { + // DepthMBBCompare - Comparison predicate that sort first based on the loop + // depth of the basic block (the unsigned), and then on the MBB number. + struct DepthMBBCompare { + typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; + bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { + // Deeper loops first + if (LHS.first != RHS.first) + return LHS.first > RHS.first; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); + unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); + if (cl != cr) + return cl > cr; + + // As a last resort, sort by block number. + return LHS.second->getNumber() < RHS.second->getNumber(); + } + }; +} + +void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, + std::vector<MachineInstr*> &TryAgain) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + SmallVector<MachineInstr*, 8> VirtCopies; + SmallVector<MachineInstr*, 8> PhysCopies; + SmallVector<MachineInstr*, 8> ImpDefCopies; + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E;) { + MachineInstr *Inst = MII++; + + // If this isn't a copy nor a extract_subreg, we can't join intervals. + unsigned SrcReg, DstReg; + if (Inst->isCopy()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->isSubregToReg()) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + } else + continue; + + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(Inst); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(Inst); + else + VirtCopies.push_back(Inst); + } + + // Try coalescing implicit copies and insert_subreg <undef> first, + // followed by copies to / from physical registers, then finally copies + // from virtual registers to virtual registers. + for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = ImpDefCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = PhysCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } + for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { + MachineInstr *TheCopy = VirtCopies[i]; + bool Again = false; + if (!JoinCopy(TheCopy, Again)) + if (Again) + TryAgain.push_back(TheCopy); + } +} + +void RegisterCoalescer::joinIntervals() { + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + + std::vector<MachineInstr*> TryAgainList; + if (loopInfo->empty()) { + // If there are no loops in the function, join intervals in function order. + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); + I != E; ++I) + CopyCoalesceInMBB(I, TryAgainList); + } else { + // Otherwise, join intervals in inner loops before other intervals. + // Unfortunately we can't just iterate over loop hierarchy here because + // there may be more MBB's than BB's. Collect MBB's for sorting. + + // Join intervals in the function prolog first. We want to join physical + // registers with virtual registers before the intervals got too long. + std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; + for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); + } + + // Sort by loop depth. + std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); + + // Finally, join intervals in loop nest order. + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) + CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + } + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; + + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + MachineInstr *&TheCopy = TryAgainList[i]; + if (!TheCopy) + continue; + + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy= 0; // Mark this one as done. + ProgressMade = true; + } + } + } +} + +void RegisterCoalescer::releaseMemory() { + JoinedCopies.clear(); + ReMatCopies.clear(); + ReMatDefs.clear(); +} + +bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { + mf_ = &fn; + mri_ = &fn.getRegInfo(); + tm_ = &fn.getTarget(); + tri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + li_ = &getAnalysis<LiveIntervals>(); + ldv_ = &getAnalysis<LiveDebugVariables>(); + AA = &getAnalysis<AliasAnalysis>(); + loopInfo = &getAnalysis<MachineLoopInfo>(); + + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); + + if (VerifyCoalescing) + mf_->verify(this, "Before register coalescing"); + + RegClassInfo.runOnMachineFunction(fn); + + // Join (coalesce) intervals if requested. + if (EnableJoining) { + joinIntervals(); + DEBUG({ + dbgs() << "********** INTERVALS POST JOINING **********\n"; + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + I != E; ++I){ + I->second->print(dbgs(), tri_); + dbgs() << "\n"; + } + }); + } + + // Perform a final pass over the instructions and compute spill weights + // and remove identity moves. + SmallVector<unsigned, 4> DeadDefs; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ) { + MachineInstr *MI = mii; + if (JoinedCopies.count(MI)) { + // Delete all coalesced copies. + bool DoDelete = true; + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + MI->getNumOperands() > 2) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // delete them later. + DoDelete = false; + + if (MI->allDefsAreDead()) { + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + li_->hasInterval(SrcReg)) + li_->shrinkToUses(&li_->getInterval(SrcReg)); + DoDelete = true; + } + if (!DoDelete) { + // We need the instruction to adjust liveness, so make it a KILL. + if (MI->isSubregToReg()) { + MI->RemoveOperand(3); + MI->RemoveOperand(1); + } + MI->setDesc(tii_->get(TargetOpcode::KILL)); + mii = llvm::next(mii); + } else { + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; + } + continue; + } + + // Now check if this is a remat'ed def instruction which is now dead. + if (ReMatDefs.count(MI)) { + bool isDead = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + DeadDefs.push_back(Reg); + if (MO.isDead()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !mri_->use_nodbg_empty(Reg)) { + isDead = false; + break; + } + } + if (isDead) { + while (!DeadDefs.empty()) { + unsigned DeadDef = DeadDefs.back(); + DeadDefs.pop_back(); + RemoveDeadDef(li_->getInterval(DeadDef), MI); + } + li_->RemoveMachineInstrFromMaps(mii); + mii = mbbi->erase(mii); + continue; + } else + DeadDefs.clear(); + } + + ++mii; + + // Check for now unnecessary kill flags. + if (li_->isNotInMIMap(MI)) continue; + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + unsigned reg = MO.getReg(); + if (!reg || !li_->hasInterval(reg)) continue; + if (!li_->getInterval(reg).killedAt(DefIdx)) { + MO.setIsKill(false); + continue; + } + // When leaving a kill flag on a physreg, check if any subregs should + // remain alive. + if (!TargetRegisterInfo::isPhysicalRegister(reg)) + continue; + for (const unsigned *SR = tri_->getSubRegisters(reg); + unsigned S = *SR; ++SR) + if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) + MI->addRegisterDefined(S, tri_); + } + } + } + + DEBUG(dump()); + DEBUG(ldv_->dump()); + if (VerifyCoalescing) + mf_->verify(this, "After register coalescing"); + return true; +} + +/// print - Implement the dump method. +void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { + li_->print(O, m); +} + +RegisterCoalescer *llvm::createRegisterCoalescer() { + return new RegisterCoalescer(); +} diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/RegisterCoalescer.h index 92f6c6474c63..4131d91c00e9 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -1,4 +1,4 @@ -//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===// +//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,37 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file implements a simple register copy coalescing phase. +// This file contains the abstract interface for register coalescers, +// allowing them to interact with and query register allocators. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H -#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "RegisterClassInfo.h" +#include "llvm/Support/IncludeFile.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/SmallPtrSet.h" + +#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H +#define LLVM_CODEGEN_REGISTER_COALESCER_H namespace llvm { - class SimpleRegisterCoalescing; - class LiveDebugVariables; + + class MachineFunction; + class RegallocQuery; + class AnalysisUsage; + class MachineInstr; class TargetRegisterInfo; + class TargetRegisterClass; class TargetInstrInfo; + class LiveDebugVariables; class VirtRegMap; class MachineLoopInfo; - /// CopyRec - Representation for copy instructions in coalescer queue. - /// - struct CopyRec { - MachineInstr *MI; - unsigned LoopDepth; - CopyRec(MachineInstr *mi, unsigned depth) - : MI(mi), LoopDepth(depth) {} - }; + class CoalescerPair; - class SimpleRegisterCoalescing : public MachineFunctionPass, - public RegisterCoalescer { + /// An abstract interface for register coalescers. Coalescers must + /// implement this interface to be part of the coalescer analysis + /// group. + class RegisterCoalescer : public MachineFunctionPass { MachineFunction* mf_; MachineRegisterInfo* mri_; const TargetMachine* tm_; @@ -61,41 +62,20 @@ namespace llvm { /// been remat'ed. SmallPtrSet<MachineInstr*, 8> ReMatDefs; - public: - static char ID; // Pass identifcation, replacement for typeid - SimpleRegisterCoalescing() : MachineFunctionPass(ID) { - initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - - /// runOnMachineFunction - pass entry point - virtual bool runOnMachineFunction(MachineFunction&); - - bool coalesceFunction(MachineFunction &mf, RegallocQuery &) { - // This runs as an independent pass, so don't do anything. - return false; - } - - /// print - Implement the dump method. - virtual void print(raw_ostream &O, const Module* = 0) const; - - private: /// joinIntervals - join compatible live intervals void joinIntervals(); /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting /// copies that cannot yet be coalesced into the "TryAgain" list. void CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<CopyRec> &TryAgain); + std::vector<MachineInstr*> &TryAgain); /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. - bool JoinCopy(CopyRec &TheCopy, bool &Again); + bool JoinCopy(MachineInstr *TheCopy, bool &Again); /// JoinIntervals - Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we can @@ -155,8 +135,109 @@ namespace llvm { /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); + + public: + static char ID; // Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + } + + /// Register allocators must call this from their own + /// getAnalysisUsage to cover the case where the coalescer is not + /// a Pass in the proper sense and isn't managed by PassManager. + /// PassManager needs to know which analyses to make available and + /// which to invalidate when running the register allocator or any + /// pass that might call coalescing. The long-term solution is to + /// allow hierarchies of PassManagers. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + /// runOnMachineFunction - pass entry point + virtual bool runOnMachineFunction(MachineFunction&); + + /// print - Implement the dump method. + virtual void print(raw_ostream &O, const Module* = 0) const; }; + /// CoalescerPair - A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + + /// dstReg_ - The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned dstReg_; + + /// srcReg_ - the virtual register that will be coalesced into dstReg. + unsigned srcReg_; + + /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the + /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a + /// virtual register. + unsigned subIdx_; + + /// partial_ - True when the original copy was a partial subregister copy. + bool partial_; + + /// crossClass_ - True when both regs are virtual, and newRC is constrained. + bool crossClass_; + + /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy + /// instruction. + bool flipped_; + + /// newRC_ - The register class of the coalesced register, or NULL if dstReg_ + /// is a physreg. + const TargetRegisterClass *newRC_; + + public: + CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) + : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0), + partial_(false), crossClass_(false), flipped_(false), newRC_(0) {} + + /// setRegisters - set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible + /// because dstReg_ is a physical register, or subIdx_ is set. + bool flip(); + + /// isCoalescable - Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// isPhys - Return true if DstReg is a physical register. + bool isPhys() const { return !newRC_; } + + /// isPartial - Return true if the original copy instruction did not copy the + /// full register, but was a subreg operation. + bool isPartial() const { return partial_; } + + /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's. + bool isCrossClass() const { return crossClass_; } + + /// isFlipped - Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return flipped_; } + + /// getDstReg - Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return dstReg_; } + + /// getSrcReg - Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return srcReg_; } + + /// getSubIdx - Return the subregister index in DstReg that SrcReg will be + /// coalesced into, or 0. + unsigned getSubIdx() const { return subIdx_; } + + /// getNewRC - Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return newRC_; } + }; } // End llvm namespace #endif diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index c8de3823553c..8b02ec44273a 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -434,8 +434,7 @@ namespace llvm { rcEnd = tri->regclass_end(); rcItr != rcEnd; ++rcItr) { const TargetRegisterClass *trc = *rcItr; - unsigned capacity = std::distance(trc->allocation_order_begin(*mf), - trc->allocation_order_end(*mf)); + unsigned capacity = trc->getRawAllocationOrder(*mf).size(); if (capacity != 0) capacityMap[trc] = capacity; @@ -482,8 +481,7 @@ namespace llvm { rcItr != rcEnd; ++rcItr) { const TargetRegisterClass *trc = *rcItr; - if (trc->allocation_order_begin(*mf) == - trc->allocation_order_end(*mf)) + if (trc->getRawAllocationOrder(*mf).empty()) continue; unsigned worstAtI = getWorst(li->reg, trc); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1302395f423e..21375b286c99 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,23 +19,33 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; +#ifndef NDEBUG +cl::opt<bool> StressSchedOpt( + "stress-sched", cl::Hidden, cl::init(false), + cl::desc("Stress test instruction scheduling")); +#endif + ScheduleDAG::ScheduleDAG(MachineFunction &mf) : TM(mf.getTarget()), TII(TM.getInstrInfo()), TRI(TM.getRegisterInfo()), MF(mf), MRI(mf.getRegInfo()), EntrySU(), ExitSU() { +#ifndef NDEBUG + StressSched = StressSchedOpt; +#endif } ScheduleDAG::~ScheduleDAG() {} /// getInstrDesc helper to handle SDNodes. -const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { if (!Node || !Node->isMachineOpcode()) return NULL; return &TII->get(Node->getMachineOpcode()); } @@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << G->TRI->getName(I->getReg()); dbgs() << "\n"; } } diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 6b7a8c6491bd..f8b1bc76eb8b 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -45,6 +45,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds if (II->getReg()) { Reg = II->getReg(); break; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 2363df429e36..446adfc2b626 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,10 +21,11 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" @@ -205,7 +206,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { bool UnitLatencies = ForceUnitLatencies(); // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // Remove any stale debug info; sometimes BuildSchedGraph is called again @@ -236,13 +237,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); - assert(!TID.isTerminator() && !MI->isLabel() && + const MCInstrDesc &MCID = MI->getDesc(); + assert(!MCID.isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); - SU->isCall = TID.isCall(); - SU->isCommutable = TID.isCommutable(); + SU->isCall = MCID.isCall(); + SU->isCommutable = MCID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -309,13 +310,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { MachineInstr *UseMI = UseSU->getInstr(); - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); if (RegUseIndex >= 0 && - (UseTID.mayLoad() || UseTID.mayStore()) && - (unsigned)RegUseIndex < UseTID.getNumOperands() && - UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + (UseMCID.mayLoad() || UseMCID.mayStore()) && + (unsigned)RegUseIndex < UseMCID.getNumOperands() && + UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } // Adjust the dependence latency using operand def/use @@ -352,17 +353,17 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned Count = I->second.second; const MachineInstr *UseMI = UseMO->getParent(); unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); // TODO: If we knew the total depth of the region here, we could // handle the case where the whole loop is inside the region but // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseTID.getNumOperands()) { + if (UseMOIdx < UseMCID.getNumOperands()) { // Currently, we only support scheduling regions consisting of // single basic blocks. Check to see if the instruction is in // the same region by checking to see if it has the same parent. if (UseMI->getParent() != MI->getParent()) { unsigned Latency = SU->Latency; - if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) Latency += SpecialAddressLatency; // This is a wild guess as to the portion of the latency which // will be overlapped by work done outside the current @@ -374,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*isMustAlias=*/false, /*isArtificial=*/true)); } else if (SpecialAddressLatency > 0 && - UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { // The entire loop body is within the current scheduling region // and the latency of this operation is assumed to be greater // than the latency of the loop. @@ -417,9 +418,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (TID.isCall() || MI->hasUnmodeledSideEffects() || + if (MCID.isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && - (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { + (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = @@ -458,7 +459,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); - } else if (TID.mayStore()) { + } else if (MCID.mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { @@ -514,7 +515,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); - } else if (TID.mayLoad()) { + } else if (MCID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e6d7ded8a784..0e005d35189d 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -16,11 +16,11 @@ #define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" using namespace llvm; @@ -115,12 +115,12 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - const TargetInstrDesc *TID = DAG->getInstrDesc(SU); - if (TID == NULL) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID == NULL) { // Don't check hazards for non-machineinstr Nodes. return NoHazard; } - unsigned idx = TID->getSchedClass(); + unsigned idx = MCID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the @@ -173,16 +173,16 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - const TargetInstrDesc *TID = DAG->getInstrDesc(SU); - assert(TID && "The scheduler must filter non-machineinstrs"); - if (DAG->TII->isZeroCost(TID->Opcode)) + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + assert(MCID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(MCID->Opcode)) return; ++IssueCount; unsigned cycle = 0; - unsigned idx = TID->getSchedClass(); + unsigned idx = MCID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e843f5fa340f..4f0d2caca22b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType); + /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -234,6 +238,9 @@ namespace { SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -994,7 +1001,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); dbgs() << '\n'); - + // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); @@ -1303,16 +1310,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// isCarryMaterialization - Returns true if V is an ADDE node that is known to -/// return 0 or 1 depending on the carry flag. -static bool isCarryMaterialization(SDValue V) { - if (V.getOpcode() != ISD::ADDE) - return false; - - ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0)); - return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1476,18 +1473,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } - // add (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), - N0.getOperand(2)); - - // add X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), - DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), - N1.getOperand(2)); - return SDValue(); } @@ -1531,16 +1516,6 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } - // addc (adde 0, 0, glue), X -> adde X, 0, glue - if (N0->hasOneUse() && isCarryMaterialization(N0)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, - DAG.getConstant(0, VT), N0.getOperand(2)); - - // addc X, (adde 0, 0, glue) -> adde X, 0, glue - if (N1->hasOneUse() && isCarryMaterialization(N1)) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, - DAG.getConstant(0, VT), N1.getOperand(2)); - return SDValue(); } @@ -1591,6 +1566,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops @@ -1622,6 +1599,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A + if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + N1.getOperand(0)); + } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && (N0.getOperand(1).getOpcode() == ISD::SUB || @@ -2508,6 +2491,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } +/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 +/// +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01C || N01C->getZExtValue() != 0xFF00) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || + !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); + if (!N101C || N101C->getZExtValue() != 0xFF00) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword is zero since the SRL 16 + // will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16 && + (!LookPassAnd0 || !LookPassAnd1) && + !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) + return SDValue(); + + SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + if (OpSizeInBits > 16) + Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + return Res; +} + +/// isBSwapHWordElement - Return true if the specified node is an element +/// that makes up a 32-bit packed halfword byteswap. i.e. +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!N1C) + return false; + + unsigned Num; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: Num = 0; break; + case 0xFF00: Num = 1; break; + case 0xFF0000: Num = 2; break; + case 0xFF000000: Num = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + SDValue N0 = N.getOperand(0); + if (Opc == ISD::AND) { + if (Num == 0 || Num == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (N0.getOpcode() != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (N0.getOpcode() != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (Num != 0 && Num != 2) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (Num != 1 && Num != 3) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[Num]) + return false; + + Parts[Num] = N0.getOperand(0).getNode(); + return true; +} + +/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + SmallVector<SDNode*,4> Parts(4, (SDNode*)0); + // Look for either + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (N1.getOpcode() == ISD::OR) { + // (or (or (and), (and)), (or (and), (and))) + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + SDValue N010 = N01.getOperand(0); + if (!isBSwapHWordElement(N010, Parts)) + return SDValue(); + SDValue N011 = N01.getOperand(1); + if (!isBSwapHWordElement(N011, Parts)) + return SDValue(); + } else { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue(Parts[0],0)); + + // Result of the bswap should be rotated by 16. If it's not legal, than + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2543,6 +2764,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + SDValue BSwap = MatchBSwapHWord(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + BSwap = MatchBSwapHWordLow(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + // reassociate or SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); if (ROR.getNode() != 0) @@ -3030,6 +3260,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) @@ -3696,6 +3929,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } +void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, + SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3784,27 +4039,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3832,6 +4068,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. @@ -3990,27 +4265,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, - N->getDebugLoc(), VT, SOp)); - } + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), + LN0->getMemoryVT(), + LN0->isVolatile(), + LN0->isNonTemporal(), + LN0->getAlignment()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + ExtLoad, DAG.getConstant(Mask, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + N0.getOperand(0).getDebugLoc(), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - - return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4198,27 +4494,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; - SmallVector<SDValue, 4> Ops; - - for (unsigned j = 0; j != 2; ++j) { - SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) - Ops.push_back(ExtLoad); - else - Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, - N->getDebugLoc(), VT, SOp)); - } - - Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), - SetCC->getValueType(0), - &Ops[0], Ops.size())); - } - + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4555,6 +4832,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode() != 0) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + BSwap, N1); + } + return SDValue(); } @@ -5180,7 +5467,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5204,7 +5492,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -5648,12 +5937,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), E = Ptr.getNode()->use_end(); I != E; ++I) { SDNode *Use = *I; if (Use == N) continue; - if (Use->isPredecessorOf(N)) + if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; if (!((Use->getOpcode() == ISD::LOAD && @@ -6431,8 +6725,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" SDValue Shorter = GetDemandedBits(Value, - APInt::getLowBitsSet(Value.getValueSizeInBits(), - ST->getMemoryVT().getSizeInBits())); + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, @@ -7156,7 +7451,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, const TargetData &TD = *TLI.getTargetData(); // Create a ConstantArray of the two constants. - Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 797f17444850..54a7d43f46d6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -547,7 +547,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(Call); - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to @@ -556,9 +556,14 @@ bool FastISel::SelectCall(const User *I) { .addReg(0U).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addCImm(CI).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addFPImm(CF).addImm(DI->getOffset()) @@ -847,7 +852,7 @@ FastISel::SelectExtractValue(const User *U) { return false; // fast-isel can't handle aggregate constants at the moment // Get the actual result register, which is an offset from the base register. - unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end()); + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -1085,7 +1090,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; @@ -1095,7 +1100,7 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1115,7 +1120,7 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1137,7 +1142,7 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1160,7 +1165,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1181,7 +1186,7 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1204,7 +1209,7 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1226,7 +1231,7 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1248,7 +1253,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); @@ -1264,7 +1269,7 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cb49a80b67e6..f0f4743298e7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; + EVT VT = Node->getValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + if (!IsClone && !IsCloned) for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); UI != E; ++UI) { @@ -100,10 +106,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, continue; Match = false; if (User->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; if (i+II.getNumDefs() < II.getNumOperands()) - RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); if (!UseRC) UseRC = RC; else if (RC) { @@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); - + // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); @@ -173,7 +178,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, } void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && @@ -184,7 +189,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; - const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -237,7 +242,7 @@ unsigned InstrEmitter::getVR(SDValue Op, Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); - // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); @@ -260,7 +265,7 @@ unsigned InstrEmitter::getVR(SDValue Op, void InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && @@ -270,9 +275,9 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); - const TargetInstrDesc &TID = MI->getDesc(); - bool isOptDef = IIOpNum < TID.getNumOperands() && - TID.OpInfo[IIOpNum].isOptionalDef(); + const MCInstrDesc &MCID = MI->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it. @@ -280,8 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) - DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); - assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + DstRC = TII->getRegClass(*II, IIOpNum, TRI); + assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -307,7 +312,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, while (Idx > 0 && MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } @@ -325,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// assertions only. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { @@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsClone, bool IsCloned) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); - assert((NumOps & 1) == 0 && - "REG_SEQUENCE must have an even number of operands!"); - const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); - for (unsigned i = 0; i != NumOps; ++i) { + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); - if (i & 1) { + if ((i & 1) == 0) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -591,7 +597,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); } // Otherwise, we're going to create an instruction here. - const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { SDNode *Node = SD->getSDNode(); @@ -610,12 +616,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - // FIXME: SDDbgValue constants aren't updated with legalization, so it's - // possible to have i128 constants in them at this point. Dwarf writer - // does not handle i128 constants at the moment so, as a crude workaround, - // just drop the debug info if this happens. - if (!CI->getValue().isSignedIntN(64)) - MIB.addReg(0U); + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); else MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { @@ -666,7 +668,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // We want a unique VR for each IMPLICIT_DEF use. return; - const TargetInstrDesc &II = TII->get(Opc); + const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NodeOperands = CountOperands(Node); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; @@ -695,9 +697,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { // Collect declared implicit uses. - const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); - UsedRegs.append(TID.getImplicitUses(), - TID.getImplicitUses() + TID.getNumImplicitUses()); + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) @@ -849,6 +851,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } break; case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 02c044c3f8f1..19fc0445b166 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -22,7 +22,7 @@ namespace llvm { -class TargetInstrDesc; +class MCInstrDesc; class SDDbgValue; class InstrEmitter { @@ -49,7 +49,7 @@ class InstrEmitter { unsigned ResNo) const; void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, + const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); @@ -63,7 +63,7 @@ class InstrEmitter { /// not in the required register class. void AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); @@ -73,7 +73,7 @@ class InstrEmitter { /// assertions only. void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, + const MCInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62d777ca3314..d06e2bdce065 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,17 +58,6 @@ class SelectionDAGLegalize { /// against each other, including inserted libcalls. SmallVector<SDValue, 8> LastCALLSEQ; - enum LegalizeAction { - Legal, // The target natively supports this operation. - Promote, // This operation should be executed in a larger type. - Expand // Try to expand this to other ops, otherwise use a libcall. - }; - - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// value type, where the two bits correspond to the LegalizeAction enum. - /// This can be queried with "getTypeAction(VT)". - TargetLowering::ValueTypeActionImpl ValueTypeActions; - /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. @@ -87,25 +76,11 @@ class SelectionDAGLegalize { public: explicit SelectionDAGLegalize(SelectionDAG &DAG); - /// getTypeAction - Return how we should legalize values of this type, either - /// it is already legal or we need to expand it into multiple registers of - /// smaller integer type, or we need to promote it to a larger type. - LegalizeAction getTypeAction(EVT VT) const { - return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT); - } - - /// isTypeLegal - Return true if this type is legal on this target. - /// - bool isTypeLegal(EVT VT) const { - return getTypeAction(VT) == Legal; - } - void LegalizeDAG(); private: - /// LegalizeOp - We know that the specified value has a legal type. - /// Recursively ensure that the operands have legal types, then return the - /// result. + /// LegalizeOp - Return a legal replacement for the given operation, with + /// all legal operands. SDValue LegalizeOp(SDValue O); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -220,10 +195,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag), - ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + DAG(dag) { } void SelectionDAGLegalize::LegalizeDAG() { @@ -753,7 +725,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { DebugLoc dl = ST->getDebugLoc(); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && - getTypeAction(MVT::i32) == Legal) { + TLI.isTypeLegal(MVT::i32)) { Tmp3 = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); @@ -763,14 +735,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. - if (getTypeAction(MVT::i64) == Legal) { + if (TLI.isTypeLegal(MVT::i64)) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } - if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. @@ -794,10 +766,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } -/// LegalizeOp - We know that the specified value has a legal type, and -/// that its operands are legal. Now ensure that the operation itself -/// is legal, recursively ensuring that the operands' operations remain -/// legal. +/// LegalizeOp - Return a legal replacement for the given operation, with +/// all legal operands. SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return Op; @@ -806,11 +776,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(getTypeAction(Node->getValueType(i)) == Legal && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((TLI.getTypeAction(*DAG.getContext(), + Node->getOperand(i).getValueType()) == + TargetLowering::TypeLegal || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -1354,7 +1327,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), @@ -1374,6 +1347,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } + + // If this is a promoted vector load, and the vector element types are + // legal, then scalarize it. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && + TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0).getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + Node->getValueType(0), &LoadVals[0], LoadVals.size()); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + } + + // If this is a promoted vector load, and the vector element types are + // illegal, create the promoted vector from bitcasted segments. + if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { + EVT MemElemTy = Node->getValueType(0).getScalarType(); + EVT SrcSclrTy = SrcVT.getScalarType(); + unsigned SizeRatio = + (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); + + SmallVector<SDValue, 8> LoadVals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, + SrcVT.getScalarType(), + Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + if (TLI.isBigEndian()) { + // MSB (which is garbage, comes first) + LoadVals.push_back(ScalarLoad.getValue(0)); + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + } else { + // LSB (which is data, comes first) + for (unsigned i = 0; i<SizeRatio-1; ++i) + LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType())); + LoadVals.push_back(ScalarLoad.getValue(0)); + } + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getScalarType(), NumElem*SizeRatio); + SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, + TempWideVector, &LoadVals[0], LoadVals.size()); + + // Cast to the correct type + ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes); + + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. + break; + + } + // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, // whether they are legal or not, and then we end up here without any @@ -1548,9 +1606,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case TargetLowering::Custom: Result = TLI.LowerOperation(Result, DAG); break; - case Expand: + case TargetLowering::Expand: + + EVT WideScalarVT = Tmp3.getValueType().getScalarType(); + EVT NarrowScalarVT = StVT.getScalarType(); + + // The Store type is illegal, must scalarize the vector store. + SmallVector<SDValue, 8> Stores; + bool ScalarLegal = TLI.isTypeLegal(WideScalarVT); + if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) { + unsigned NumElem = StVT.getVectorNumElements(); + + unsigned ScalarSize = StVT.getScalarType().getSizeInBits(); + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + // Types smaller than 8 bits are promoted to 8 bits. + ScalarSize = std::max<unsigned>(ScalarSize, 8); + // Store stride + unsigned Stride = ScalarSize/8; + assert(isPowerOf2_32(Stride) && "Stride must be a power of two"); + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + + + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); + + Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // The Store type is illegal, must scalarize the vector store. + // However, the scalar type is illegal. Must bitcast the result + // and store it in smaller parts. + if (!TLI.isTypeLegal(StVT) && StVT.isVector()) { + unsigned WideNumElem = StVT.getVectorNumElements(); + unsigned Stride = NarrowScalarVT.getSizeInBits()/8; + + unsigned SizeRatio = + (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits()); + + EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT, + SizeRatio*WideNumElem); + + // Cast the wide elem vector to wider vec with smaller elem type. + // Example <2 x i64> -> <4 x i32> + Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); + + for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) { + // Extract elment i + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); + // bump pointer. + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(Stride)); + + // Store if, this element is: + // - First element on big endian, or + // - Last element on little endian + if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) || + ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) { + SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, + ST->getPointerInfo().getWithOffset(Idx*Stride), + isVolatile, isNonTemporal, Alignment); + Stores.push_back(Store); + } + } + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + break; + } + + // TRUNCSTORE:i16 i32 -> STORE i16 - assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); + assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); @@ -1709,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue SignBit; EVT FloatVT = Tmp2.getValueType(); EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); - if (isTypeLegal(IVT)) { + if (TLI.isTypeLegal(IVT)) { // Convert to an integer with the same sign bit. SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { @@ -3031,7 +3171,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (getTypeAction(EltVT) == Promote) + if (!TLI.isTypeLegal(EltVT)) EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; @@ -3184,6 +3324,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_PPCF128)); break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27a466b3a928..e6835d87f82c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; @@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { NVT, &Op, 1, false, N->getDebugLoc()); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; @@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b8da57f4ffe0..e7c77dd10cb6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -191,10 +192,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); - if (NInVT.isVector()) - // Promote vector element via memory load/store. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - CreateStackStoreLoad(InOp, OutVT)); break; case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. @@ -204,8 +201,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { break; case TargetLowering::TypeScalarizeVector: // Convert the element to an integer and promote it by hand. - return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, - BitConvertToInteger(GetScalarizedVector(InOp))); + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; case TargetLowering::TypeSplitVector: { // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. @@ -339,8 +338,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? - ISD::AssertZext : ISD::AssertSext, dl, - NVT, Res, DAG.getValueType(N->getValueType(0))); + ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + DAG.getValueType(N->getValueType(0).getScalarType())); } SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { @@ -370,7 +369,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, DAG.getValueType(N->getOperand(0).getValueType())); if (N->getOpcode() == ISD::ZERO_EXTEND) - return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Res, dl, + N->getOperand(0).getValueType().getScalarType()); assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); return Res; } @@ -520,20 +520,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; + SDValue InOp = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); - switch (getTypeAction(N->getOperand(0).getValueType())) { + switch (getTypeAction(InOp.getValueType())) { default: llvm_unreachable("Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypeExpandInteger: - Res = N->getOperand(0); + Res = InOp; break; case TargetLowering::TypePromoteInteger: - Res = GetPromotedInteger(N->getOperand(0)); + Res = GetPromotedInteger(InOp); break; + case TargetLowering::TypeSplitVector: + EVT InVT = InOp.getValueType(); + assert(InVT.isVector() && "Cannot split scalar types"); + unsigned NumElts = InVT.getVectorNumElements(); + assert(NumElts == NVT.getVectorNumElements() && + "Dst and Src must have the same number of elements"); + EVT EltVT = InVT.getScalarType(); + assert(isPowerOf2_32(NumElts) && + "Promoted vector type must be a power of two"); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2); + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), + NumElts/2); + + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, + DAG.getIntPtrConstant(0)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, + DAG.getIntPtrConstant(NumElts/2)); + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } // Truncate to NVT instead of VT - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res); + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); } SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { @@ -970,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); } @@ -1069,6 +1094,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -2146,6 +2173,86 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + const Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(); + const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + DebugLoc dl = N->getDebugLoc(); + + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + RHS, DAG.getConstant(0, VT), ISD::SETNE); + SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, + DAG.getConstant(1, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); + SDValue Overflow; + Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + + // Replace this with a libcall that will check overflow. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = N->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + true, Func, Args, DAG, dl); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, + MachinePointerInfo(), false, false, 0); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2638,18 +2745,18 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - unsigned OutNumElems = N->getValueType(0).getVectorNumElements(); + unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue BaseIdx = N->getOperand(1); SmallVector<SDValue, 8> Ops; + Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { // Extract the element from the original vector. @@ -2681,18 +2788,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue V1 = GetPromotedInteger(N->getOperand(1)); - EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT OutVT = V0.getValueType(); - return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]); + return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { - - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); @@ -2702,6 +2804,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SmallVector<SDValue, 8> Ops; + Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); Ops.push_back(Op); @@ -2714,10 +2817,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - assert(!InVT.isVector() && "Input must not be a scalar"); + assert(!N->getOperand(0).getValueType().isVector() && + "Input must be a scalar"); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2730,12 +2831,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { - - SDValue InOp0 = N->getOperand(0); - EVT InVT = InOp0.getValueType(); - EVT InElVT = InVT.getVectorElementType(); - EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); @@ -2744,7 +2839,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0); + SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b2f966bb7d4c..952797dc75b8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -201,7 +201,7 @@ private: EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } // Integer Result Promotion. @@ -318,6 +318,7 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -377,6 +378,7 @@ private: SDValue SoftenFloatRes_FLOG(SDNode *N); SDValue SoftenFloatRes_FLOG2(SDNode *N); SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); @@ -441,6 +443,7 @@ private: void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5d0f923afb0f..ffff10ce2948 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -182,9 +182,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; - case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9595f6947feb..b5698f9c6738 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2164,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (MemVT.getSizeInBits() <= WidenEltWidth) break; if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2179,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 7b560d173ed3..b275c6321ae4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -249,14 +249,14 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; // LoadNode may already exist. This can happen when there is another @@ -422,10 +422,10 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -490,7 +490,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -504,10 +505,10 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8d61a898f6b3..12b183804c28 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -276,6 +276,43 @@ private: }; } // end anonymous namespace +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost) { + EVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::untyped) { + const SDNode *Node = RegDefPos.GetNode(); + unsigned Opcode = Node->getMachineOpcode(); + + if (Opcode == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Idx = RegDefPos.GetIdx(); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { @@ -800,14 +837,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); @@ -987,10 +1024,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// FIXME: Move to SelectionDAG? static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = TID.getNumDefs(); - for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + unsigned NumRes = MCID.getNumDefs(); + for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1055,7 +1092,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { ++i; // Skip the ID value. if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags)) { + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -1070,10 +1108,10 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (!Node->isMachineOpcode()) continue; - const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); - if (!TID.ImplicitDefs) + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) + for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1369,6 +1407,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. struct bu_ls_rr_sort : public queue_sort { @@ -1569,20 +1622,33 @@ protected: }; template<class SF> -class RegReductionPriorityQueue : public RegReductionPQBase { - static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { - std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), - E = Q.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Q.end())) - std::swap(*Best, Q.back()); - Q.pop_back(); - return V; +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; public: @@ -1603,7 +1669,7 @@ public: SUnit *pop() { if (Queue.empty()) return NULL; - SUnit *V = popFromQueue(Queue, Picker); + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; return V; } @@ -1613,7 +1679,7 @@ public: std::vector<SUnit*> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { - SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); if (isBottomUp()) dbgs() << "Height " << SU->getHeight() << ": "; else @@ -1778,9 +1844,9 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { } for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; } @@ -1891,9 +1957,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + RegPressure[RCId] += Cost; break; } } @@ -1906,16 +1973,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { if (SkipRegDefs > 0) continue; - EVT VT = RegDefPos.GetValue(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] -= Cost; } } dumpRegPressure(); @@ -1962,13 +2029,9 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { unsigned POpc = PN->getMachineOpcode(); if (POpc == TargetOpcode::IMPLICIT_DEF) continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { EVT VT = PN->getValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); @@ -2543,11 +2606,11 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { if (SU->isTwoAddress) { unsigned Opc = SU->getNode()->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned i = 0; i != NumOps; ++i) { - if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { SDNode *DU = SU->getNode()->getOperand(i).getNode(); if (DU->getNodeId() != -1 && Op->OrigNode == &(*SUnits)[DU->getNodeId()]) @@ -2727,11 +2790,11 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { bool isLiveOut = hasOnlyLiveOutUses(SU); unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - unsigned NumRes = TID.getNumDefs(); - unsigned NumOps = TID.getNumOperands() - NumRes; + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; for (unsigned j = 0; j != NumOps; ++j) { - if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) continue; SDNode *DU = SU->getNode()->getOperand(j).getNode(); if (DU->getNodeId() == -1) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 9f2f0121a86d..71f07d6fa47a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,11 +17,12 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -111,7 +112,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, unsigned ResNo = User->getOperand(2).getResNo(); if (Def->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; @@ -255,8 +256,8 @@ void ScheduleDAGSDNodes::ClusterNodes() { continue; unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (TID.mayLoad()) + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) // Cluster loads from "near" addresses into combined SUnits. ClusterNeighboringLoads(Node); } @@ -378,7 +379,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { - const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>(); + const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -390,14 +391,14 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - for (unsigned i = 0; i != TID.getNumOperands(); ++i) { - if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { SU->isTwoAddress = true; break; } } - if (TID.isCommutable()) + if (MCID.isCommutable()) SU->isCommutable = true; } @@ -435,7 +436,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // it requires a cross class copy (cost < 0). That means we are only // treating "expensive to copy" register dependency as physical register // dependency. This may change in the future though. - if (Cost >= 0) + if (Cost >= 0 && !StressSched) PhysReg = 0; // If this is a ctrl dep, latency is 1. @@ -520,14 +521,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { for (;DefIdx < NodeNumDefs; ++DefIdx) { if (!Node->hasAnyUseOfValue(DefIdx)) continue; - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { - // Propagate the incoming (full-register) type. I doubt it's needed. - ValueType = Node->getOperand(0).getValueType(); - } - else { - ValueType = Node->getValueType(DefIdx); - } + ValueType = Node->getValueType(DefIdx); ++DefIdx; return; // Found a normal regdef. } @@ -649,7 +643,7 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // order number right after the N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); + ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); for (unsigned i = 0, e = DVs.size(); i != e; ++i) { if (DVs[i]->isInvalidated()) continue; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b5f68f3055cf..9c27b2ea02ec 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -135,6 +135,14 @@ namespace llvm { return ValueType; } + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx-1; + } + void Advance(); private: void InitNodeNumDefs(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 68eeb609d401..35ea0bb940b5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -598,7 +598,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) { Ordering->remove(N); // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. - SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N); + ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) DbgVals[i]->setIsInvalidated(); } @@ -3326,13 +3326,13 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && "Expecting memcpy / memset source to meet alignment requirement!"); - // If 'SrcAlign' is zero, that means the memory operation does not need load - // the value, i.e. memset or memcpy from constant string. Otherwise, it's - // the inferred alignment of the source. 'DstAlign', on the other hand, is the - // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. 'MemcpyStrSrc' - // indicates whether the memcpy source is constant so it does not need to be - // loaded. + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, NonScalarIntSafe, MemcpyStrSrc, DAG.getMachineFunction()); @@ -4037,6 +4037,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -4142,6 +4144,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -4165,6 +4169,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -4191,6 +4197,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -4216,6 +4224,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); @@ -5508,9 +5518,9 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { return; SDNode *FromNode = From.getNode(); SDNode *ToNode = To.getNode(); - SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode); + ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); SmallVector<SDDbgValue *, 2> ClonedDVs; - for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end(); + for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); I != E; ++I) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { @@ -5691,24 +5701,39 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// isPredecessorOf - Return true if this node is a predecessor of N. This node -/// is either an operand of N or it can be reached by traversing up the operands. -/// NOTE: this is an expensive method. Use it carefully. -bool SDNode::isPredecessorOf(SDNode *N) const { - SmallPtrSet<SDNode *, 32> Visited; - SmallVector<SDNode *, 16> Worklist; - Worklist.push_back(N); +/// hasPredecessor - Return true if N is a predecessor of this node. +/// N is either an operand of this node, or can be reached by recursively +/// traversing up the operands. +/// NOTE: This is an expensive method. Use it carefully. +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + return hasPredecessorHelper(N, Visited, Worklist); +} - do { - N = Worklist.pop_back_val(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDNode *Op = N->getOperand(i).getNode(); - if (Op == this) - return true; +bool SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet<const SDNode *, 32> &Visited, + SmallVector<const SDNode *, 16> &Worklist) const { + if (Visited.empty()) { + Worklist.push_back(this); + } else { + // Take a look in the visited set. If we've already encountered this node + // we needn't search further. + if (Visited.count(N)) + return true; + } + + // Haven't visited N yet. Continue the search. + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + SDNode *Op = M->getOperand(i).getNode(); if (Visited.insert(Op)) Worklist.push_back(Op); + if (Op == N) + return true; } - } while (!Worklist.empty()); + } return false; } @@ -5863,6 +5888,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSUB: return "fsub"; case ISD::FMUL: return "fmul"; case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7a8a975d0294..81b03ee76a5c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -286,22 +286,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - unsigned NumElts = ValueVT.getVectorNumElements(); - SmallVector<SDValue, 8> NewOps; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); - SDValue Cast; + bool Smaller = ValueVT.bitsLE(PartVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); - bool Smaller = ValueVT.bitsLE(PartVT); - - Cast = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Ext); - - NewOps.push_back(Cast); - } - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, - &NewOps[0], NewOps.size()); } // Trivial bitcast if the types are the same size and the destination @@ -310,9 +298,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + // Handle cases such as i8 -> <1 x i1> + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartVT) { + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -453,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorElementType() == ValueVT.getVectorElementType() && PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in @@ -475,28 +471,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && PartVT.getVectorElementType().bitsGE( - ValueVT.getVectorElementType())&& + ValueVT.getVectorElementType()) && PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - unsigned NumElts = ValueVT.getVectorNumElements(); - SmallVector<SDValue, 8> NewOps; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i)); - SDValue Cast = DAG.getNode(ISD::ANY_EXTEND, - DL, PartVT.getScalarType(), Ext); - NewOps.push_back(Cast); - } - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, - &NewOps[0], NewOps.size()); + bool Smaller = PartVT.bitsLE(ValueVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } else{ // Vector -> scalar conversion. - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } Parts[0] = Val; @@ -1280,6 +1271,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, return true; } +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock()); + BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock()); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + uint32_t weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, weight); +} + + static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast<Instruction>(V)) return I->getParent() == BB; @@ -1549,8 +1558,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB); + addSuccessorWithWeight(SwitchBB, CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1694,8 +1703,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1718,7 +1727,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; - if (CountPopulation_64(B.Mask) == 1) { + unsigned PopCount = CountPopulation_64(B.Mask); + if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), @@ -1726,6 +1736,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ShiftOp, DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, @@ -1740,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); + addSuccessorWithWeight(SwitchBB, B.TargetBB); + addSuccessorWithWeight(SwitchBB, NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1981,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to @@ -2009,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); + addSuccessorWithWeight(JumpTableBB, *I); } } @@ -2428,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { succs.push_back(I.getSuccessor(i)); array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + for (unsigned i = 0, e = succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2489,6 +2509,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + // Turn exact SDivs into multiplications. + // FIXME: This should be in DAGCombiner, but it doesn't have access to the + // exact bit. + if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && + !isa<ConstantSDNode>(Op1) && + isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) + setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + else + setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + Op1, Op2)); +} + void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) @@ -2855,7 +2891,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2895,7 +2931,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -4623,6 +4659,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -4759,6 +4802,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return 0; + } + case Intrinsic::trap: { StringRef TrapFuncName = getTrapFunctionName(); if (TrapFuncName.empty()) { @@ -4789,15 +4839,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ @@ -5415,54 +5466,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; } // end anonymous namespace -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -5597,52 +5600,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, return; } - // This is a reference to a register class that doesn't directly correspond - // to an LLVM register class. Allocate NumRegs consecutive, available, - // registers from the class. - std::vector<unsigned> RegClassRegs - = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - unsigned NumAllocated = 0; - for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { - unsigned Reg = RegClassRegs[i]; - // See if this register is available. - if ((isOutReg && OutputRegs.count(Reg)) || // Already used. - (isInReg && InputRegs.count(Reg))) { // Already used. - // Make sure we find consecutive registers. - NumAllocated = 0; - continue; - } - - // Check to see if this register is allocatable (i.e. don't give out the - // stack pointer). - const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); - if (!RC) { // Couldn't allocate this register. - // Reset NumAllocated to make sure we return consecutive registers. - NumAllocated = 0; - continue; - } - - // Okay, this register is good, we can use it. - ++NumAllocated; - - // If we allocated enough consecutive registers, succeed. - if (NumAllocated == NumRegs) { - unsigned RegStart = (i-NumAllocated)+1; - unsigned RegEnd = i+1; - // Mark all of the allocated registers used. - for (unsigned i = RegStart; i != RegEnd; ++i) - Regs.push_back(RegClassRegs[i]); - - OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), - OpInfo.ConstraintVT); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); - return; - } - } - // Otherwise, we couldn't allocate enough registers for this. } @@ -5749,10 +5706,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -6015,8 +5975,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) + if (OpInfo.AssignedRegs.Regs.empty()) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -6031,8 +5990,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands( - InlineAsm::Kind_RegDefEarlyClobber, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, DAG, AsmNodeOperands); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8376d41e1531..a0884ebf5d56 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -434,6 +434,9 @@ private: const Value* SV, MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); + + uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -464,7 +467,7 @@ private: void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitSDiv(const User &I); void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } void visitAnd (const User &I) { visitBinary(I, ISD::AND); } void visitOr (const User &I) { visitBinary(I, ISD::OR); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 771b0089fdc0..87bb296b8c79 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -68,6 +69,11 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + #ifndef NDEBUG static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, @@ -186,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -199,6 +206,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -262,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF); FuncInfo->set(Fn, *MF); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); + else + FuncInfo->BPI = 0; + SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -339,9 +354,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const MachineBasicBlock *MBB = I; for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - if ((TID.isCall() && !TID.isReturn()) || + if ((MCID.isCall() && !MCID.isReturn()) || II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); goto done; @@ -666,7 +681,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); - const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); @@ -2596,9 +2611,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc); - bool mayLoad = TID.mayLoad(); - bool mayStore = TID.mayStore(); + const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; for (SmallVector<MachineMemOperand*, 2>::const_iterator I = diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index cf6069a2f185..2626ac3bbb2a 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MUL_I32] = "__mulsi3"; Names[RTLIB::MUL_I64] = "__muldi3"; Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; Names[RTLIB::SDIV_I8] = "__divqi3"; Names[RTLIB::SDIV_I16] = "__divhi3"; Names[RTLIB::SDIV_I32] = "__divsi3"; @@ -136,6 +139,10 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::REM_F64] = "fmod"; Names[RTLIB::REM_F80] = "fmodl"; Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; Names[RTLIB::POWI_F80] = "__powixf2"; @@ -673,10 +680,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, NewVT = EltTy; IntermediateVT = NewVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -965,8 +978,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. @@ -1762,9 +1781,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && - Op.getOperand(0).getValueType().isFloatingPoint() && - !Op.getOperand(0).getValueType().isVector()) { + if (!Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { @@ -1902,7 +1921,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // comparisons. if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -2608,7 +2627,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - // FIXME: lots more standard ones to handle. if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -2661,9 +2679,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { - + if (Constraint.length() > 1) return; - + char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; @@ -2722,13 +2740,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } -std::vector<unsigned> TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - return std::vector<unsigned>(); -} - - std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -2853,7 +2864,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } - + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. if (const StructType *STy = dyn_cast<StructType>(OpTy)) if (STy->getNumElements() == 1) @@ -2955,10 +2966,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass*> MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass*> InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -3204,6 +3218,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, + SelectionDAG &DAG) const { + ConstantSDNode *C = cast<ConstantSDNode>(Op2); + APInt d = C->getAPIntValue(); + assert(d != 0 && "Division by zero!"); + + // Shift the value upfront if it is even, so the LSB is one. + unsigned ShAmt = d.countTrailingZeros(); + if (ShAmt) { + // TODO: For UDIV use SRL instead of SRA. + SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + d = d.ashr(ShAmt); + } + + // Calculate the multiplicative inverse, using Newton's method. + APInt t, xn = d; + while ((t = d*xn) != 1) + xn *= APInt(d.getBitWidth(), 2) - t; + + Op2 = DAG.getConstant(xn, Op1.getValueType()); + return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); +} + /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 6ab0cb03c065..5a253a4d97e4 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -45,7 +45,8 @@ namespace { /// StackEntryTy - Abstract type of a link in the shadow stack. /// - const StructType *StackEntryTy; + StructType *StackEntryTy; + StructType *FrameMapTy; /// Roots - GC roots in the current function. Each is a pair of the /// intrinsic call and its corresponding alloca. @@ -164,8 +165,7 @@ namespace { InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, - Args.begin(), Args.end(), - CI->getName(), CallBB); + Args, CI->getName(), CallBB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); CI->replaceAllUsesWith(II); @@ -194,31 +194,31 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { // Truncate the ShadowStackDescriptor if some metadata is null. unsigned NumMeta = 0; - SmallVector<Constant*,16> Metadata; + SmallVector<Constant*, 16> Metadata; for (unsigned I = 0; I != Roots.size(); ++I) { Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); if (!C->isNullValue()) NumMeta = I + 1; Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); } + Metadata.resize(NumMeta); + const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Constant *BaseElts[] = { - ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false), - ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false), + ConstantInt::get(Int32Ty, Roots.size(), false), + ConstantInt::get(Int32Ty, NumMeta, false), }; Constant *DescriptorElts[] = { - ConstantStruct::get(F.getContext(), BaseElts, 2, false), - ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), - Metadata.begin(), NumMeta) + ConstantStruct::get(FrameMapTy, BaseElts), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata) }; - Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2, - false); - - std::string TypeName("gc_map."); - TypeName += utostr(NumMeta); - F.getParent()->addTypeName(TypeName, FrameMap->getType()); + Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()}; + StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems // that, short of multithreaded LLVM, it should be safe; all that is @@ -246,17 +246,12 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { // doInitialization creates the generic version of this type. - std::vector<const Type*> EltTys; + std::vector<Type*> EltTys; EltTys.push_back(StackEntryTy); for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - Type *Ty = StructType::get(F.getContext(), EltTys); - - std::string TypeName("gc_stackentry."); - TypeName += F.getName(); - F.getParent()->addTypeName(TypeName, Ty); - - return Ty; + + return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys); } /// doInitialization - If this module uses the GC intrinsics, find them now. If @@ -267,13 +262,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. // void *Meta[]; // May be absent for roots without metadata. // }; - std::vector<const Type*> EltTys; + std::vector<Type*> EltTys; // 32 bits is ok up to a 32GB stack frame. :) EltTys.push_back(Type::getInt32Ty(M.getContext())); // Specifies length of variable length array. EltTys.push_back(Type::getInt32Ty(M.getContext())); - StructType *FrameMapTy = StructType::get(M.getContext(), EltTys); - M.addTypeName("gc_map", FrameMapTy); + FrameMapTy = StructType::createNamed("gc_map", EltTys); PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); // struct StackEntry { @@ -281,18 +275,14 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // FrameMap *Map; // Pointer to constant FrameMap. // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - OpaqueType *RecursiveTy = OpaqueType::get(M.getContext()); - + + StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry"); + EltTys.clear(); - EltTys.push_back(PointerType::getUnqual(RecursiveTy)); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); EltTys.push_back(FrameMapPtrTy); - PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys); - - RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); - StackEntryTy = cast<StructType>(LinkTyH.get()); + StackEntryTy->setBody(EltTys); const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); - M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from - // a FunctionPass? // Get the root chain if it already exists. Head = M.getGlobalVariable("llvm_gc_root_chain"); @@ -399,7 +389,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) { Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, 0,1,"gc_frame.map"); - AtEntry.CreateStore(FrameMap, EntryMapPtr); + AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp deleted file mode 100644 index 221bec50d850..000000000000 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ /dev/null @@ -1,1539 +0,0 @@ -//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple register coalescing pass that attempts to -// aggressively coalesce every register copy that it can. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regcoalescing" -#include "SimpleRegisterCoalescing.h" -#include "VirtRegMap.h" -#include "LiveDebugVariables.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterCoalescer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include <algorithm> -#include <cmath> -using namespace llvm; - -STATISTIC(numJoins , "Number of interval joins performed"); -STATISTIC(numCrossRCs , "Number of cross class joins performed"); -STATISTIC(numCommutes , "Number of instruction commuting performed"); -STATISTIC(numExtends , "Number of copies extended"); -STATISTIC(NumReMats , "Number of instructions re-materialized"); -STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); -STATISTIC(numAborts , "Number of times interval joining aborted"); - -char SimpleRegisterCoalescing::ID = 0; -static cl::opt<bool> -EnableJoining("join-liveintervals", - cl::desc("Coalesce copies (default=true)"), - cl::init(true)); - -static cl::opt<bool> -DisableCrossClassJoin("disable-cross-class-join", - cl::desc("Avoid coalescing cross register class copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -EnablePhysicalJoin("join-physregs", - cl::desc("Join physical register copies"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -VerifyCoalescing("verify-coalescing", - cl::desc("Verify machine instrs before and after register coalescing"), - cl::Hidden); - -INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer, - "simple-register-coalescing", "Simple Register Coalescing", - false, false, true) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer, - "simple-register-coalescing", "Simple Register Coalescing", - false, false, true) - -char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; - -void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addRequired<LiveIntervals>(); - AU.addPreserved<LiveIntervals>(); - AU.addRequired<LiveDebugVariables>(); - AU.addPreserved<LiveDebugVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<MachineLoopInfo>(); - AU.addPreserved<MachineLoopInfo>(); - AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addPreservedID(TwoAddressInstructionPassID); - MachineFunctionPass::getAnalysisUsage(AU); -} - -void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) { - /// Joined copies are not deleted immediately, but kept in JoinedCopies. - JoinedCopies.insert(CopyMI); - - /// Mark all register operands of CopyMI as <undef> so they won't affect dead - /// code elimination. - for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), - E = CopyMI->operands_end(); I != E; ++I) - if (I->isReg()) - I->setIsUndef(true); -} - -/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a copy from B, -/// see if we can merge these two pieces of B into a single value number, -/// eliminating a copy. For example: -/// -/// A3 = B0 -/// ... -/// B1 = A3 <- this copy -/// -/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 -/// value number to be replaced with B0 (which simplifies the B liveinterval). -/// -/// This returns true if an interval was modified. -/// -bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // Bail if there is no dst interval - can happen when merging physical subreg - // operations. - if (!li_->hasInterval(CP.getDstReg())) - return false; - - LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - - // BValNo is a value number in B that is defined by a copy from A. 'B3' in - // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; - - // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we - // can't process it. - if (!BValNo->isDefByCopy()) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - - // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; - // If it's re-defined by an early clobber somewhere in the live range, then - // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. - // See PR3149: - // 172 %ECX<def> = MOV32rr %reg1039<kill> - // 180 INLINEASM <es:subl $5,$1 - // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, - // %EAX<kill>, - // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 - // 188 %EAX<def> = MOV32rr %EAX<kill> - // 196 %ECX<def> = MOV32rr %ECX<kill> - // 204 %ECX<def> = MOV32rr %ECX<kill> - // 212 %EAX<def> = MOV32rr %EAX<kill> - // 220 %EAX<def> = MOV32rr %EAX - // 228 %reg1039<def> = MOV32rr %ECX<kill> - // The early clobber operand ties ECX input to the ECX def. - // - // The live interval of ECX is represented as this: - // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) - // The coalescer has no idea there was a def in the middle of [174,230]. - if (AValNo->hasRedefByEC()) - return false; - - // If AValNo is defined as a copy from IntB, we can potentially process this. - // Get the instruction that defines this value number. - if (!CP.isCoalescable(AValNo->getCopy())) - return false; - - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) - return false; - - // Make sure that the end of the live range is inside the same block as - // CopyMI. - MachineInstr *ValLREndInst = - li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) - return false; - - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; - - // If a live interval is a physical register, conservatively check if any - // of its aliases is overlapping the live interval of the virtual register. - // If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { - DEBUG({ - dbgs() << "\t\tInterfere with alias "; - li_->getInterval(*AS).print(dbgs(), tri_); - }); - return false; - } - } - - DEBUG({ - dbgs() << "Extending: "; - IntB.print(dbgs(), tri_); - }); - - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; - // We are about to delete CopyMI, so need to remove it as the 'instruction - // that defines this value #'. Update the valnum with the new defining - // instruction #. - BValNo->def = FillerStart; - BValNo->setCopy(0); - - // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the - // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); - - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, - li_->getVNInfoAllocator()))); - } - } - - // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); - IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } - DEBUG({ - dbgs() << " result = "; - IntB.print(dbgs(), tri_); - dbgs() << "\n"; - }); - - // If the source instruction was killing the source register before the - // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); - if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); - } - - // If the copy instruction was killing the destination register before the - // merge, find the last use and trim the live range. That will also add the - // isKill marker. - if (ALR->end == CopyIdx) - li_->shrinkToUses(&IntA); - - ++numExtends; - return true; -} - -/// HasOtherReachingDefs - Return true if there are definitions of IntB -/// other than BValNo val# that can reach uses of AValno val# of IntA. -bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, - LiveInterval &IntB, - VNInfo *AValNo, - VNInfo *BValNo) { - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) - --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { - if (BI->valno == BValNo) - continue; - if (BI->start <= AI->start && BI->end > AI->start) - return true; - if (BI->start > AI->start && BI->start < AI->end) - return true; - } - } - return false; -} - -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with -/// IntA being the source and IntB being the dest, thus this defines a value -/// number in IntB. If the source value number (in IntA) is defined by a -/// commutable instruction and its other operand is coalesced to the copy dest -/// register, see if we can transform the copy into a noop by commuting the -/// definition. For example, -/// -/// A3 = op A2 B0<kill> -/// ... -/// B1 = A3 <- this copy -/// ... -/// = op A3 <- more uses -/// -/// ==> -/// -/// B2 = op B0 A2<kill> -/// ... -/// B1 = B2 <- now an identify copy -/// ... -/// = op B2 <- more uses -/// -/// This returns true if an interval was modified. -/// -bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // FIXME: For now, only eliminate the copy by commuting its def when the - // source register is a virtual register. We want to guard against cases - // where the copy is a back edge copy and commuting the def lengthen the - // live interval of the source register to the entire loop. - if (CP.isPhys() && CP.isFlipped()) - return false; - - // Bail if there is no dst interval. - if (!li_->hasInterval(CP.getDstReg())) - return false; - - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - - LiveInterval &IntA = - li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); - LiveInterval &IntB = - li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - - // BValNo is a value number in B that is defined by a copy from A. 'B3' in - // the example above. - VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || !BValNo->isDefByCopy()) - return false; - - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - - // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); - assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) - return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); - if (!DefMI) - return false; - const TargetInstrDesc &TID = DefMI->getDesc(); - if (!TID.isCommutable()) - return false; - // If DefMI is a two-address instruction then commuting it will change the - // destination register. - int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); - assert(DefIdx != -1); - unsigned UseOpIdx; - if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) - return false; - unsigned Op1, Op2, NewDstIdx; - if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) - return false; - if (Op1 == UseOpIdx) - NewDstIdx = Op2; - else if (Op2 == UseOpIdx) - NewDstIdx = Op1; - else - return false; - - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !NewDstMO.isKill()) - return false; - - // Make sure there are no other definitions of IntB that would reach the - // uses which the new definition can reach. - if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) - return false; - - // Abort if the aliases of IntB.reg have values that are not simply the - // clobbers from the superreg. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) - if (li_->hasInterval(*AS) && - HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) - return false; - - // If some of the uses of IntA.reg is already coalesced away, return false. - // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_nodbg_iterator UI = - mri_->use_nodbg_begin(IntA.reg), - UE = mri_->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - SlotIndex UseIdx = li_->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end()) - continue; - if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) - return false; - } - - DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' - << *DefMI); - - // At this point we have decided that it is legal to do this - // transformation. Start by commuting the instruction. - MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = tii_->commuteInstruction(DefMI); - if (!NewMI) - return false; - if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && - TargetRegisterInfo::isVirtualRegister(IntB.reg) && - !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg))) - return false; - if (NewMI != DefMI) { - li_->ReplaceMachineInstrInMaps(DefMI, NewMI); - MBB->insert(DefMI, NewMI); - MBB->erase(DefMI); - } - unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); - NewMI->getOperand(OpIdx).setIsKill(); - - // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. - // A = or A, B - // ... - // B = A - // ... - // C = A<kill> - // ... - // = B - - // Update uses of IntA of the specific Val# with IntB. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), - UE = mri_->use_end(); UI != UE;) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - ++UI; - if (JoinedCopies.count(UseMI)) - continue; - if (UseMI->isDebugValue()) { - // FIXME These don't have an instruction index. Not clear we have enough - // info to decide whether to do this replacement or not. For now do it. - UseMO.setReg(NewReg); - continue; - } - SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) - continue; - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - UseMO.substPhysReg(NewReg, *tri_); - else - UseMO.setReg(NewReg); - if (UseMI == CopyMI) - continue; - if (!UseMI->isCopy()) - continue; - if (UseMI->getOperand(0).getReg() != IntB.reg || - UseMI->getOperand(0).getSubReg()) - continue; - - // This copy will become a noop. If it's defining a new val#, merge it into - // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); - VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); - if (!DVNI) - continue; - DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); - assert(DVNI->def == DefIdx); - BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - markAsJoined(UseMI); - } - - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition - // is updated. - VNInfo *ValNo = BValNo; - ValNo->def = AValNo->def; - ValNo->setCopy(0); - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); - } - DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); - - IntA.removeValNo(AValNo); - DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); - ++numCommutes; - return true; -} - -/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial -/// computation, replace the copy by rematerialize the definition. -bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, - bool preserveSrcInt, - unsigned DstReg, - unsigned DstSubIdx, - MachineInstr *CopyMI) { - SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex(); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) - return false; - MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); - if (!DefMI) - return false; - assert(DefMI && "Defining instruction disappeared"); - const TargetInstrDesc &TID = DefMI->getDesc(); - if (!TID.isAsCheapAsAMove()) - return false; - if (!tii_->isTriviallyReMaterializable(DefMI, AA)) - return false; - bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, AA, SawStore)) - return false; - if (TID.getNumDefs() != 1) - return false; - if (!DefMI->isImplicitDef()) { - // Make sure the copy destination register class fits the instruction - // definition register class. The mismatch can happen as a result of earlier - // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (mri_->getRegClass(DstReg) != RC) - return false; - } else if (!RC->contains(DstReg)) - return false; - } - - // If destination register has a sub-register index on it, make sure it - // matches the instruction register class. - if (DstSubIdx) { - const TargetInstrDesc &TID = DefMI->getDesc(); - if (TID.getNumDefs() != 1) - return false; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = - DstRC->getSubRegisterRegClass(DstSubIdx); - const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_); - if (DefRC == DstRC) - DstSubIdx = 0; - else if (DefRC != DstSubRC) - return false; - } - - RemoveCopyFlag(DstReg, CopyMI); - - MachineBasicBlock *MBB = CopyMI->getParent(); - MachineBasicBlock::iterator MII = - llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); - MachineInstr *NewMI = prior(MII); - - // CopyMI may have implicit operands, transfer them over to the newly - // rematerialized instruction. And update implicit def interval valnos. - for (unsigned i = CopyMI->getDesc().getNumOperands(), - e = CopyMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = CopyMI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) - NewMI->addOperand(MO); - if (MO.isDef()) - RemoveCopyFlag(MO.getReg(), CopyMI); - } - - NewMI->copyImplicitOps(CopyMI); - li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); - CopyMI->eraseFromParent(); - ReMatCopies.insert(CopyMI); - ReMatDefs.insert(DefMI); - DEBUG(dbgs() << "Remat: " << *NewMI); - ++NumReMats; - - // The source interval can become smaller because we removed a use. - if (preserveSrcInt) - li_->shrinkToUses(&SrcInt); - - return true; -} - -/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and -/// update the subregister number if it is not zero. If DstReg is a -/// physical register and the existing subregister number of the def / use -/// being updated is not zero, make sure to set it to the correct physical -/// subregister. -void -SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { - bool DstIsPhys = CP.isPhys(); - unsigned SrcReg = CP.getSrcReg(); - unsigned DstReg = CP.getDstReg(); - unsigned SubIdx = CP.getSubIdx(); - - // Update LiveDebugVariables. - ldv_->renameRegister(SrcReg, DstReg, SubIdx); - - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); - MachineInstr *UseMI = I.skipInstruction();) { - // A PhysReg copy that won't be coalesced can perhaps be rematerialized - // instead. - if (DstIsPhys) { - if (UseMI->isCopy() && - !UseMI->getOperand(1).getSubReg() && - !UseMI->getOperand(0).getSubReg() && - UseMI->getOperand(1).getReg() == SrcReg && - UseMI->getOperand(0).getReg() != SrcReg && - UseMI->getOperand(0).getReg() != DstReg && - !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, - UseMI->getOperand(0).getReg(), 0, UseMI)) - continue; - } - - SmallVector<unsigned,8> Ops; - bool Reads, Writes; - tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); - bool Kills = false, Deads = false; - - // Replace SrcReg with DstReg in all UseMI operands. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(Ops[i]); - Kills |= MO.isKill(); - Deads |= MO.isDead(); - - if (DstIsPhys) - MO.substPhysReg(DstReg, *tri_); - else - MO.substVirtReg(DstReg, SubIdx, *tri_); - } - - // This instruction is a copy that will be removed. - if (JoinedCopies.count(UseMI)) - continue; - - if (SubIdx) { - // If UseMI was a simple SrcReg def, make sure we didn't turn it into a - // read-modify-write of DstReg. - if (Deads) - UseMI->addRegisterDead(DstReg, tri_); - else if (!Reads && Writes) - UseMI->addRegisterDefined(DstReg, tri_); - - // Kill flags apply to the whole physical register. - if (DstIsPhys && Kills) - UseMI->addRegisterKilled(DstReg, tri_); - } - - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; - dbgs() << *UseMI; - }); - } -} - -/// removeIntervalIfEmpty - Check if the live interval of a physical register -/// is empty, if so remove it and also remove the empty intervals of its -/// sub-registers. Return true if live interval is removed. -static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, - const TargetRegisterInfo *tri_) { - if (li.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - LiveInterval &sli = li_->getInterval(*SR); - if (sli.empty()) - li_->removeInterval(*SR); - } - li_->removeInterval(li.reg); - return true; - } - return false; -} - -/// RemoveDeadDef - If a def of a live interval is now determined dead, remove -/// the val# it defines. If the live interval becomes empty, remove it as well. -bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, - MachineInstr *DefMI) { - SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex(); - LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); - if (DefIdx != MLR->valno->def) - return false; - li.removeValNo(MLR->valno); - return removeIntervalIfEmpty(li, li_, tri_); -} - -void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, - const MachineInstr *CopyMI) { - SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); - if (li_->hasInterval(DstReg)) { - LiveInterval &LI = li_->getInterval(DstReg); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } - if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) - return; - for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { - if (!li_->hasInterval(*AS)) - continue; - LiveInterval &LI = li_->getInterval(*AS); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } -} - -/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. -/// We need to be careful about coalescing a source physical register with a -/// virtual register. Once the coalescing is done, it cannot be broken and these -/// are not spillable! If the destination interval uses are far away, think -/// twice about coalescing them! -bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { - bool Allocatable = li_->isAllocatable(CP.getDstReg()); - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - - /// Always join simple intervals that are defined by a single copy from a - /// reserved register. This doesn't increase register pressure, so it is - /// always beneficial. - if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) - return true; - - if (!EnablePhysicalJoin) { - DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); - return false; - } - - // Only coalesce to allocatable physreg, we don't want to risk modifying - // reserved registers. - if (!Allocatable) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } - - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial()) { - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold) { - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - return false; - } - } - return true; -} - -/// isWinToJoinCrossClass - Return true if it's profitable to coalesce -/// two virtual registers from different register classes. -bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC) { - unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); - // This heuristics is good enough in practice, but it's obviously not *right*. - // 4 is a magic number that works well enough for x86, ARM, etc. It filter - // out all but the most restrictive register classes. - if (NewRCCount > 4 || - // Early exit if the function is fairly small, coalesce aggressively if - // that's the case. For really special register classes with 3 or - // fewer registers, be a bit more careful. - (li_->getFuncInstructionCount() / NewRCCount) < 8) - return true; - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); - unsigned DstSize = li_->getApproximateInstructionCount(DstInt); - - // Coalesce aggressively if the intervals are small compared to the number of - // registers in the new class. The number 4 is fairly arbitrary, chosen to be - // less aggressive than the 8 used for the whole function size. - const unsigned ThresSize = 4 * NewRCCount; - if (SrcSize <= ThresSize && DstSize <= ThresSize) - return true; - - // Estimate *register use density*. If it doubles or more, abort. - unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), - mri_->use_nodbg_end()); - unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), - mri_->use_nodbg_end()); - unsigned NewUses = SrcUses + DstUses; - unsigned NewSize = SrcSize + DstSize; - if (SrcRC != NewRC && SrcSize > ThresSize) { - unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); - if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) - return false; - } - if (DstRC != NewRC && DstSize > ThresSize) { - unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); - if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) - return false; - } - return true; -} - - -/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, -/// which are the src/dst of the copy instruction CopyMI. This returns true -/// if the copy was successfully coalesced away. If it is not currently -/// possible to coalesce this interval, but it may be possible if other -/// things get coalesced, then it returns true by reference in 'Again'. -bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { - MachineInstr *CopyMI = TheCopy.MI; - - Again = false; - if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) - return false; // Already done. - - DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - - CoalescerPair CP(*tii_, *tri_); - if (!CP.setRegisters(CopyMI)) { - DEBUG(dbgs() << "\tNot coalescable.\n"); - return false; - } - - // If they are already joined we continue. - if (CP.getSrcReg() == CP.getDstReg()) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tCopy already coalesced.\n"); - return false; // Not coalescable. - } - - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) - << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) - << "\n"); - - // Enforce policies. - if (CP.isPhys()) { - if (!shouldJoinPhys(CP)) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) - return true; - return false; - } - } else { - // Avoid constraining virtual register regclass too much. - if (CP.isCrossClass()) { - DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); - if (DisableCrossClassJoin) { - DEBUG(dbgs() << "\tCross-class joins disabled.\n"); - return false; - } - if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), - mri_->getRegClass(CP.getSrcReg()), - mri_->getRegClass(CP.getDstReg()), - CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - - // When possible, let DstReg be the larger interval. - if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > - li_->getInterval(CP.getDstReg()).ranges.size()) - CP.flip(); - } - - // Okay, attempt to join these two intervals. On failure, this returns false. - // Otherwise, if one of the intervals being joined is a physreg, this method - // always canonicalizes DstInt to be it. The output "SrcInt" will not have - // been modified, so we can use this information below to update aliases. - if (!JoinIntervals(CP)) { - // Coalescing failed. - - // If definition of source is defined by trivial computation, try - // rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, - CP.getDstReg(), 0, CopyMI)) - return true; - - // If we can eliminate the copy without merging the live ranges, do so now. - if (!CP.isPartial()) { - if (AdjustCopiesBackFrom(CP, CopyMI) || - RemoveCopyByCommutingDef(CP, CopyMI)) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); - return true; - } - } - - // Otherwise, we are unable to join the intervals. - DEBUG(dbgs() << "\tInterference!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - - // Coalescing to a virtual register that is of a sub-register class of the - // other. Make sure the resulting register is set to the right register class. - if (CP.isCrossClass()) { - ++numCrossRCs; - mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); - } - - // Remember to delete the copy instruction. - markAsJoined(CopyMI); - - UpdateRegDefsUses(CP); - - // If we have extended the live range of a physical register, make sure we - // update live-in lists as well. - if (CP.isPhys()) { - SmallVector<MachineBasicBlock*, 16> BlockSeq; - // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the - // ranges for this, and they are preserved. - LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); - for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); - I != E; ++I ) { - li_->findLiveInMBBs(I->start, I->end, BlockSeq); - for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { - MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(CP.getDstReg())) - block.addLiveIn(CP.getDstReg()); - } - BlockSeq.clear(); - } - } - - // SrcReg is guarateed to be the register whose live interval that is - // being merged. - li_->removeInterval(CP.getSrcReg()); - - // Update regalloc hint. - tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); - - DEBUG({ - LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); - dbgs() << "\tJoined. Result = "; - DstInt.print(dbgs(), tri_); - dbgs() << "\n"; - }); - - ++numJoins; - return true; -} - -/// ComputeUltimateVN - Assuming we are going to join two live intervals, -/// compute what the resultant value numbers for each value in the input two -/// ranges will be. This is complicated by copies between the two which can -/// and will commonly cause multiple value numbers to be merged into one. -/// -/// VN is the value number that we're trying to resolve. InstDefiningValue -/// keeps track of the new InstDefiningValue assignment for the result -/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of -/// whether a value in this or other is a copy from the opposite set. -/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have -/// already been assigned. -/// -/// ThisFromOther[x] - If x is defined as a copy from the other interval, this -/// contains the value number the copy is from. -/// -static unsigned ComputeUltimateVN(VNInfo *VNI, - SmallVector<VNInfo*, 16> &NewVNInfo, - DenseMap<VNInfo*, VNInfo*> &ThisFromOther, - DenseMap<VNInfo*, VNInfo*> &OtherFromThis, - SmallVector<int, 16> &ThisValNoAssignments, - SmallVector<int, 16> &OtherValNoAssignments) { - unsigned VN = VNI->id; - - // If the VN has already been computed, just return it. - if (ThisValNoAssignments[VN] >= 0) - return ThisValNoAssignments[VN]; - assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); - - // If this val is not a copy from the other val, then it must be a new value - // number in the destination. - DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); - if (I == ThisFromOther.end()) { - NewVNInfo.push_back(VNI); - return ThisValNoAssignments[VN] = NewVNInfo.size()-1; - } - VNInfo *OtherValNo = I->second; - - // Otherwise, this *is* a copy from the RHS. If the other side has already - // been computed, return it. - if (OtherValNoAssignments[OtherValNo->id] >= 0) - return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - - // Mark this value number as currently being computed, then ask what the - // ultimate value # of the other value is. - ThisValNoAssignments[VN] = -2; - unsigned UltimateVN = - ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, - OtherValNoAssignments, ThisValNoAssignments); - return ThisValNoAssignments[VN] = UltimateVN; -} - -/// JoinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. -bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { - LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); - DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); - - // If a live interval is a physical register, check for interference with any - // aliases. The interference check implemented here is a bit more conservative - // than the full interfeence check below. We allow overlapping live ranges - // only when one is a copy of the other. - if (CP.isPhys()) { - for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ - if (!li_->hasInterval(*AS)) - continue; - const LiveInterval &LHS = li_->getInterval(*AS); - LiveInterval::const_iterator LI = LHS.begin(); - for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); - RI != RE; ++RI) { - LI = std::lower_bound(LI, LHS.end(), RI->start); - // Does LHS have an overlapping live range starting before RI? - if ((LI != LHS.begin() && LI[-1].end > RI->start) && - (RI->start != RI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); - dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; - }); - return false; - } - - // Check that LHS ranges beginning in this range are copies. - for (; LI != LHS.end() && LI->start < RI->end; ++LI) { - if (LI->start != LI->valno->def || - !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), tri_); - dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; - }); - return false; - } - } - } - } - } - - // Compute the final value assignment, assuming that the live ranges can be - // coalesced. - SmallVector<int, 16> LHSValNoAssignments; - SmallVector<int, 16> RHSValNoAssignments; - DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; - DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; - SmallVector<VNInfo*, 16> NewVNInfo; - - LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); - DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); - - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? - continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; - - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (!CP.isCoalescable(VNI->getCopy())) - continue; - - // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - // The copy could be to an aliased physreg. - if (!lr) continue; - LHSValsDefinedFromRHS[VNI] = lr->valno; - } - - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? - continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; - - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (!CP.isCoalescable(VNI->getCopy())) - continue; - - // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - // The copy could be to an aliased physreg. - if (!lr) continue; - RHSValsDefinedFromLHS[VNI] = lr->valno; - } - - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); - } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; - continue; - } - - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } - - // Armed with the mappings of LHS/RHS values to ultimate values, walk the - // interval lists to see if these intervals are coalescable. - LiveInterval::const_iterator I = LHS.begin(); - LiveInterval::const_iterator IE = LHS.end(); - LiveInterval::const_iterator J = RHS.begin(); - LiveInterval::const_iterator JE = RHS.end(); - - // Skip ahead until the first place of potential sharing. - if (I != IE && J != JE) { - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; - } - } - - while (I != IE && J != JE) { - // Determine if these two live ranges overlap. - bool Overlaps; - if (I->start < J->start) { - Overlaps = I->end > J->start; - } else { - Overlaps = J->end > I->start; - } - - // If so, check value # info to determine if they are really different. - if (Overlaps) { - // If the live range overlap will map to the same value number in the - // result liverange, we can still coalesce them. If not, we can't. - if (LHSValNoAssignments[I->valno->id] != - RHSValNoAssignments[J->valno->id]) - return false; - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) - return false; - } - - if (I->end < J->end) - ++I; - else - ++J; - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } - - if (LHSValNoAssignments.empty()) - LHSValNoAssignments.push_back(-1); - if (RHSValNoAssignments.empty()) - RHSValNoAssignments.push_back(-1); - - // If we get here, we know that we can coalesce the live ranges. Ask the - // intervals to coalesce themselves now. - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); - return true; -} - -namespace { - // DepthMBBCompare - Comparison predicate that sort first based on the loop - // depth of the basic block (the unsigned), and then on the MBB number. - struct DepthMBBCompare { - typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; - bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { - // Deeper loops first - if (LHS.first != RHS.first) - return LHS.first > RHS.first; - - // Prefer blocks that are more connected in the CFG. This takes care of - // the most difficult copies first while intervals are short. - unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); - unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); - if (cl != cr) - return cl > cr; - - // As a last resort, sort by block number. - return LHS.second->getNumber() < RHS.second->getNumber(); - } - }; -} - -void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector<CopyRec> &TryAgain) { - DEBUG(dbgs() << MBB->getName() << ":\n"); - - SmallVector<CopyRec, 8> VirtCopies; - SmallVector<CopyRec, 8> PhysCopies; - SmallVector<CopyRec, 8> ImpDefCopies; - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E;) { - MachineInstr *Inst = MII++; - - // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg; - if (Inst->isCopy()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isSubregToReg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - } else - continue; - - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(CopyRec(Inst, 0)); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(CopyRec(Inst, 0)); - else - VirtCopies.push_back(CopyRec(Inst, 0)); - } - - // Try coalescing implicit copies and insert_subreg <undef> first, - // followed by copies to / from physical registers, then finally copies - // from virtual registers to virtual registers. - for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { - CopyRec &TheCopy = ImpDefCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { - CopyRec &TheCopy = PhysCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { - CopyRec &TheCopy = VirtCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } -} - -void SimpleRegisterCoalescing::joinIntervals() { - DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); - - std::vector<CopyRec> TryAgainList; - if (loopInfo->empty()) { - // If there are no loops in the function, join intervals in function order. - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); - I != E; ++I) - CopyCoalesceInMBB(I, TryAgainList); - } else { - // Otherwise, join intervals in inner loops before other intervals. - // Unfortunately we can't just iterate over loop hierarchy here because - // there may be more MBB's than BB's. Collect MBB's for sorting. - - // Join intervals in the function prolog first. We want to join physical - // registers with virtual registers before the intervals got too long. - std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; - for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){ - MachineBasicBlock *MBB = I; - MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I)); - } - - // Sort by loop depth. - std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); - - // Finally, join intervals in loop nest order. - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, TryAgainList); - } - - // Joining intervals can allow other intervals to be joined. Iteratively join - // until we make no progress. - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - CopyRec &TheCopy = TryAgainList[i]; - if (!TheCopy.MI) - continue; - - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy.MI = 0; // Mark this one as done. - ProgressMade = true; - } - } - } -} - -void SimpleRegisterCoalescing::releaseMemory() { - JoinedCopies.clear(); - ReMatCopies.clear(); - ReMatDefs.clear(); -} - -bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - li_ = &getAnalysis<LiveIntervals>(); - ldv_ = &getAnalysis<LiveDebugVariables>(); - AA = &getAnalysis<AliasAnalysis>(); - loopInfo = &getAnalysis<MachineLoopInfo>(); - - DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'); - - if (VerifyCoalescing) - mf_->verify(this, "Before register coalescing"); - - RegClassInfo.runOnMachineFunction(fn); - - // Join (coalesce) intervals if requested. - if (EnableJoining) { - joinIntervals(); - DEBUG({ - dbgs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); - I != E; ++I){ - I->second->print(dbgs(), tri_); - dbgs() << "\n"; - } - }); - } - - // Perform a final pass over the instructions and compute spill weights - // and remove identity moves. - SmallVector<unsigned, 4> DeadDefs; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ) { - MachineInstr *MI = mii; - if (JoinedCopies.count(MI)) { - // Delete all coalesced copies. - bool DoDelete = true; - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - MI->getNumOperands() > 2) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - - if (MI->allDefsAreDead()) { - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - li_->hasInterval(SrcReg)) - li_->shrinkToUses(&li_->getInterval(SrcReg)); - DoDelete = true; - } - if (!DoDelete) { - // We need the instruction to adjust liveness, so make it a KILL. - if (MI->isSubregToReg()) { - MI->RemoveOperand(3); - MI->RemoveOperand(1); - } - MI->setDesc(tii_->get(TargetOpcode::KILL)); - mii = llvm::next(mii); - } else { - li_->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; - } - continue; - } - - // Now check if this is a remat'ed def instruction which is now dead. - if (ReMatDefs.count(MI)) { - bool isDead = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - DeadDefs.push_back(Reg); - if (MO.isDead()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !mri_->use_nodbg_empty(Reg)) { - isDead = false; - break; - } - } - if (isDead) { - while (!DeadDefs.empty()) { - unsigned DeadDef = DeadDefs.back(); - DeadDefs.pop_back(); - RemoveDeadDef(li_->getInterval(DeadDef), MI); - } - li_->RemoveMachineInstrFromMaps(mii); - mii = mbbi->erase(mii); - continue; - } else - DeadDefs.clear(); - } - - ++mii; - - // Check for now unnecessary kill flags. - if (li_->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - unsigned reg = MO.getReg(); - if (!reg || !li_->hasInterval(reg)) continue; - if (!li_->getInterval(reg).killedAt(DefIdx)) { - MO.setIsKill(false); - continue; - } - // When leaving a kill flag on a physreg, check if any subregs should - // remain alive. - if (!TargetRegisterInfo::isPhysicalRegister(reg)) - continue; - for (const unsigned *SR = tri_->getSubRegisters(reg); - unsigned S = *SR; ++SR) - if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) - MI->addRegisterDefined(S, tri_); - } - } - } - - DEBUG(dump()); - DEBUG(ldv_->dump()); - if (VerifyCoalescing) - mf_->verify(this, "After register coalescing"); - return true; -} - -/// print - Implement the dump method. -void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const { - li_->print(O, m); -} - -RegisterCoalescer* llvm::createSimpleRegisterCoalescer() { - return new SimpleRegisterCoalescing(); -} - -// Make sure that anything that uses RegisterCoalescer pulls in this file... -DEFINING_FILE_FOR(SimpleRegisterCoalescing) diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 92970e496c25..65a33da93afe 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -87,12 +87,10 @@ FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { bool SjLjEHPass::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf - const Type *VoidPtrTy = - Type::getInt8PtrTy(M.getContext()); - const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); FunctionContextTy = - StructType::get(M.getContext(), - VoidPtrTy, // __prev + StructType::get(VoidPtrTy, // __prev Int32Ty, // call_site ArrayType::get(Int32Ty, 4), // __data VoidPtrTy, // __personality diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index bf27cc86574f..761cab7ce850 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -76,12 +76,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { return LSP.first; // There may not be a call instruction (?) in which case we ignore LPad. LSP.second = LSP.first; - for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin(); - I != E; --I) + for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); + I != E;) { + --I; if (I->getDesc().isCall()) { LSP.second = LIS.getInstructionIndex(I); break; } + } } // If CurLI is live into a landing pad successor, move the last split point @@ -122,7 +124,7 @@ void SplitAnalysis::analyzeUses() { // Compute per-live block info. if (!calcLiveBlockInfo()) { // FIXME: calcLiveBlockInfo found inconsistencies in the live range. - // I am looking at you, SimpleRegisterCoalescing! + // I am looking at you, RegisterCoalescer! DidRepairRange = true; ++NumRepairs; DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); @@ -165,7 +167,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); // If the block contains no uses, the range must be live through. At one - // point, SimpleRegisterCoalescing could create dangling ranges that ended + // point, RegisterCoalescer could create dangling ranges that ended // mid-block. if (UseI == UseE || *UseI >= Stop) { ++NumThroughBlocks; @@ -634,6 +636,7 @@ unsigned SplitEditor::openIntv() { void SplitEditor::selectIntv(unsigned Idx) { assert(Idx != 0 && "Cannot select the complement interval"); assert(Idx < Edit->size() && "Can only select previously opened interval"); + DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); OpenIdx = Idx; } @@ -654,6 +657,24 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { return VNI->def; } +SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvAfter"); + DEBUG(dbgs() << " enterIntvAfter " << Idx); + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvAfter called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), + llvm::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); SlotIndex End = LIS.getMBBEndIdx(&MBB); @@ -1005,12 +1026,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { markComplexMapped(i, ParentVNI); } -#ifndef NDEBUG - // Every new interval must have a def by now, otherwise the split is bogus. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); -#endif - // Transfer the simply mapped values, check if any are skipped. bool Skipped = transferValues(); if (Skipped) @@ -1109,3 +1124,263 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { } finish(); } + + +//===----------------------------------------------------------------------===// +// Global Live Range Splitting Support +//===----------------------------------------------------------------------===// + +// These methods support a method of global live range splitting that uses a +// global algorithm to decide intervals for CFG edges. They will insert split +// points and color intervals in basic blocks while avoiding interference. +// +// Note that splitSingleBlock is also useful for blocks where both CFG edges +// are on the stack. + +void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter){ + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + + DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop + << ") intf " << LeaveBefore << '-' << EnterAfter + << ", live-through " << IntvIn << " -> " << IntvOut); + + assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); + + if (!IntvOut) { + DEBUG(dbgs() << ", spill on entry.\n"); + // + // <<<<<<<<< Possible LeaveBefore interference. + // |-----------| Live through. + // -____________ Spill on entry. + // + selectIntv(IntvIn); + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + SlotIndex Idx = leaveIntvAtTop(*MBB); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + (void)Idx; + return; + } + + if (!IntvIn) { + DEBUG(dbgs() << ", reload on exit.\n"); + // + // >>>>>>> Possible EnterAfter interference. + // |-----------| Live through. + // ___________-- Reload on exit. + // + selectIntv(IntvOut); + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + SlotIndex Idx = enterIntvAtEnd(*MBB); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + (void)Idx; + return; + } + + if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { + DEBUG(dbgs() << ", straight through.\n"); + // + // |-----------| Live through. + // ------------- Straight through, same intv, no interference. + // + selectIntv(IntvOut); + useIntv(Start, Stop); + return; + } + + // We cannot legally insert splits after LSP. + SlotIndex LSP = SA.getLastSplitPoint(MBBNum); + + if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || + LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { + DEBUG(dbgs() << ", switch avoiding interference.\n"); + // + // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ------======= Switch intervals between interference. + // + SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP; + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(Cut); + useIntv(Idx, Stop); + selectIntv(IntvIn); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + DEBUG(dbgs() << ", create local intv for interference.\n"); + // + // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ==---------== Switch intervals before/after interference. + // + assert(LeaveBefore <= EnterAfter && "Missed case"); + + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + selectIntv(IntvIn); + Idx = leaveIntvBefore(LeaveBefore); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); +} + + +void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstUse << '-' << BI.LastUse + << ", reg-in " << IntvIn << ", leave before " << LeaveBefore + << (BI.LiveOut ? ", stack-out" : ", killed in block")); + + assert(IntvIn && "Must have register in"); + assert(BI.LiveIn && "Must be live-in"); + assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); + + if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) { + DEBUG(dbgs() << " before interference.\n"); + // + // <<< Interference after kill. + // |---o---x | Killed in block. + // ========= Use IntvIn everywhere. + // + selectIntv(IntvIn); + useIntv(Start, BI.LastUse); + return; + } + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) { + // + // <<< Possible interference after last use. + // |---o---o---| Live-out on stack. + // =========____ Leave IntvIn after last use. + // + // < Interference after last use. + // |---o---o--o| Live-out on stack, late last use. + // ============ Copy to stack after LSP, overlap IntvIn. + // \_____ Stack interval is live-out. + // + if (BI.LastUse < LSP) { + DEBUG(dbgs() << ", spill after last use before interference.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAfter(BI.LastUse); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } else { + DEBUG(dbgs() << ", spill before last split point.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvBefore(LSP); + overlapIntv(Idx, BI.LastUse); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } + return; + } + + // The interference is overlapping somewhere we wanted to use IntvIn. That + // means we need to create a local interval that can be allocated a + // different register. + unsigned LocalIntv = openIntv(); + (void)LocalIntv; + DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); + + if (!BI.LiveOut || BI.LastUse < LSP) { + // + // <<<<<<< Interference overlapping uses. + // |---o---o---| Live-out on stack. + // =====----____ Leave IntvIn before interference, then spill. + // + SlotIndex To = leaveIntvAfter(BI.LastUse); + SlotIndex From = enterIntvBefore(LeaveBefore); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); + return; + } + + // <<<<<<< Interference overlapping uses. + // |---o---o--o| Live-out on stack, late last use. + // =====------- Copy to stack before LSP, overlap LocalIntv. + // \_____ Stack interval is live-out. + // + SlotIndex To = leaveIntvBefore(LSP); + overlapIntv(To, BI.LastUse); + SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore)); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); +} + +void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter) { + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstUse << '-' << BI.LastUse + << ", reg-out " << IntvOut << ", enter after " << EnterAfter + << (BI.LiveIn ? ", stack-in" : ", defined in block")); + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + assert(IntvOut && "Must have register out"); + assert(BI.LiveOut && "Must be live-out"); + assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); + + if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) { + DEBUG(dbgs() << " after interference.\n"); + // + // >>>> Interference before def. + // | o---o---| Defined in block. + // ========= Use IntvOut everywhere. + // + selectIntv(IntvOut); + useIntv(BI.FirstUse, Stop); + return; + } + + if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) { + DEBUG(dbgs() << ", reload after interference.\n"); + // + // >>>> Interference before def. + // |---o---o---| Live-through, stack-in. + // ____========= Enter IntvOut before first use. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse)); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + // The interference is overlapping somewhere we wanted to use IntvOut. That + // means we need to create a local interval that can be allocated a + // different register. + DEBUG(dbgs() << ", interference overlaps uses.\n"); + // + // >>>>>>> Interference overlapping uses. + // |---o---o---| Live-through, stack-in. + // ____---====== Create local interval for interference range. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + openIntv(); + SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse)); + useIntv(From, Idx); +} diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 7174c0b55f23..7948b725f856 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -81,6 +81,12 @@ public: bool LiveThrough; ///< Live in whole block (Templ 5. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. + + /// isOneInstr - Returns true when this BlockInfo describes a single + /// instruction. + bool isOneInstr() const { + return SlotIndex::isSameInstr(FirstUse, LastUse); + } }; private: @@ -360,6 +366,10 @@ public: /// Return the beginning of the new live range. SlotIndex enterIntvBefore(SlotIndex Idx); + /// enterIntvAfter - Enter the open interval after the instruction at Idx. + /// Return the beginning of the new live range. + SlotIndex enterIntvAfter(SlotIndex Idx); + /// enterIntvAtEnd - Enter the open interval at the end of MBB. /// Use the open interval from he inserted copy to the MBB end. /// Return the beginning of the new live range. @@ -416,6 +426,42 @@ public: /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + + /// splitLiveThroughBlock - Split CurLI in the given block such that it + /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in + /// the block, but they will be ignored when placing split points. + /// + /// @param MBBNum Block number. + /// @param IntvIn Interval index entering the block. + /// @param LeaveBefore When set, leave IntvIn before this point. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter); + + /// splitRegInBlock - Split CurLI in the given block such that it enters the + /// block in IntvIn and leaves it on the stack (or not at all). Split points + /// are placed in a way that avoids putting uses in the stack interval. This + /// may require creating a local interval when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvIn Interval index entering the block. Not 0. + /// @param LeaveBefore When set, leave IntvIn before this point. + void splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore); + + /// splitRegOutBlock - Split CurLI in the given block such that it enters the + /// block on the stack (or isn't live-in at all) and leaves it in IntvOut. + /// Split points are placed to avoid interference and such that the uses are + /// not in the stack interval. This may require creating a local interval + /// when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter); }; } diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp index 08aee82b8c5c..ec75df4b7d1f 100644 --- a/lib/CodeGen/Splitter.cpp +++ b/lib/CodeGen/Splitter.cpp @@ -11,7 +11,7 @@ #include "Splitter.h" -#include "SimpleRegisterCoalescing.h" +#include "RegisterCoalescer.h" #include "llvm/Module.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index f0a44abaf5cd..d3cbd15b64e8 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -186,7 +186,7 @@ bool StackProtector::InsertStackProtectors() { Value *Args[] = { LI, AI }; CallInst:: Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - &Args[0], array_endof(Args), "", InsPt); + Args, "", InsPt); // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 01f5b5627f4f..57cbe1ba5960 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -504,7 +504,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, bool FoundDef = false; // Not counting 2address def. Uses.clear(); - const TargetInstrDesc &TID = MII->getDesc(); + const MCInstrDesc &MCID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) @@ -521,7 +521,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, if (MO.getSubReg() || MII->isSubregToReg()) return false; - const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); if (RC && !RC->contains(NewReg)) return false; @@ -566,7 +566,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, SmallVector<MachineOperand*, 4> Uses; while (++MII != MBB->end()) { bool FoundKill = false; - const TargetInstrDesc &TID = MII->getDesc(); + const MCInstrDesc &MCID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (!MO.isReg()) @@ -583,7 +583,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, if (MO.getSubReg()) return false; - const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); if (RC && !RC->contains(NewReg)) return false; if (MO.isKill()) diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index e8eab8f5cf61..6b801cbf6e1e 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -95,10 +95,22 @@ namespace { SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool shouldTailDuplicate(const MachineFunction &MF, - MachineBasicBlock &TailBB); - bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + bool IsSimple, MachineBasicBlock &TailBB); + bool isSimpleBB(MachineBasicBlock *TailBB); + bool canCompletelyDuplicateBB(MachineBasicBlock &BB); + bool duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + const DenseSet<unsigned> &RegsUsedByPhi, + SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies); + bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF); + void RemoveDeadBlock(MachineBasicBlock *MBB); }; @@ -169,6 +181,109 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } +/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. +bool +TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF) { + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) + return false; + + ++NumTails; + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->isDebugValue()) { + // SSAUpdate can replace the use with an undef. That creates + // a debug instruction that is a kill. + // FIXME: Should it SSAUpdate job to delete debug instructions + // instead of replacing the use with undef? + UseMI->eraseFromParent(); + continue; + } + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (NewPHIs.size()) + NumAddedPHIs += NewPHIs.size(); + + return true; +} + /// TailDuplicateBlocks - Look for small blocks that are unconditionally /// branched to and do not fall through. Tail-duplicate their instructions /// into their predecessors to eliminate (dynamic) branches. @@ -180,100 +295,22 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { VerifyPHIs(MF, true); } - SmallVector<MachineInstr*, 8> NewPHIs; - MachineSSAUpdater SSAUpdate(MF, &NewPHIs); - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; if (NumTails == TailDupLimit) break; - // Save the successors list. - SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), - MBB->succ_end()); - - SmallVector<MachineBasicBlock*, 8> TDBBs; - SmallVector<MachineInstr*, 16> Copies; - if (TailDuplicate(MBB, MF, TDBBs, Copies)) { - ++NumTails; - - // TailBB's immediate successors are now successors of those predecessors - // which duplicated TailBB. Add the predecessors as sources to the PHI - // instructions. - bool isDead = MBB->pred_empty(); - if (PreRegAlloc) - UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); - - // If it is dead, remove it. - if (isDead) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); - ++NumDeadBlocks; - } - - // Update SSA form. - if (!SSAUpdateVRs.empty()) { - for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { - unsigned VReg = SSAUpdateVRs[i]; - SSAUpdate.Initialize(VReg); - - // If the original definition is still around, add it as an available - // value. - MachineInstr *DefMI = MRI->getVRegDef(VReg); - MachineBasicBlock *DefBB = 0; - if (DefMI) { - DefBB = DefMI->getParent(); - SSAUpdate.AddAvailableValue(DefBB, VReg); - } - - // Add the new vregs as available values. - DenseMap<unsigned, AvailableValsTy>::iterator LI = - SSAUpdateVals.find(VReg); - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; - unsigned SrcReg = LI->second[j].second; - SSAUpdate.AddAvailableValue(SrcBB, SrcReg); - } - - // Rewrite uses that are outside of the original def's block. - MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); - while (UI != MRI->use_end()) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - ++UI; - if (UseMI->getParent() == DefBB && !UseMI->isPHI()) - continue; - SSAUpdate.RewriteUse(UseMO); - } - } + bool IsSimple = isSimpleBB(MBB); - SSAUpdateVRs.clear(); - SSAUpdateVals.clear(); - } - - // Eliminate some of the copies inserted by tail duplication to maintain - // SSA form. - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - MachineInstr *Copy = Copies[i]; - if (!Copy->isCopy()) - continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { - // Copy is the only use. Do trivial copy propagation here. - MRI->replaceRegWith(Dst, Src); - Copy->eraseFromParent(); - } - } + if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + continue; - if (PreRegAlloc && TailDupVerify) - VerifyPHIs(MF, false); - MadeChange = true; - } + MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); } - NumAddedPHIs += NewPHIs.size(); + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); return MadeChange; } @@ -283,6 +320,8 @@ static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; + if (UseMI->isDebugValue()) + continue; if (UseMI->getParent() != BB) return true; } @@ -485,11 +524,16 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, /// shouldTailDuplicate - Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, MachineBasicBlock &TailBB) { // Only duplicate blocks that end with unconditional branches. if (TailBB.canFallThrough()) return false; + // Don't try to tail-duplicate single-block loops. + if (TailBB.isSuccessor(&TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. @@ -500,73 +544,208 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, else MaxDuplicateCount = TailDuplicateSize; - if (PreRegAlloc) { - if (TailBB.empty()) - return false; - const TargetInstrDesc &TID = TailBB.back().getDesc(); - // Pre-regalloc tail duplication hurts compile time and doesn't help - // much except for indirect branches. - if (!TID.isIndirectBranch()) - return false; - // If the target has hardware branch prediction that can handle indirect - // branches, duplicating them can often make them predictable when there - // are common paths through the code. The limit needs to be high enough - // to allow undoing the effects of tail merging and other optimizations - // that rearrange the predecessors of the indirect branch. - MaxDuplicateCount = 20; - } + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. - // Don't try to tail-duplicate single-block loops. - if (TailBB.isSuccessor(&TailBB)) - return false; + bool HasIndirectbr = false; + if (!TailBB.empty()) + HasIndirectbr = TailBB.back().getDesc().isIndirectBranch(); + + if (HasIndirectbr && PreRegAlloc) + MaxDuplicateCount = 20; // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - bool HasCall = false; for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) return false; + if (I->getDesc().isNotDuplicable()) + return false; + // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->getDesc().isReturn()) return false; - // Don't duplicate more than the threshold. - if (InstrCount == MaxDuplicateCount) return false; - // Remember if we saw a call. - if (I->getDesc().isCall()) HasCall = true; + if (PreRegAlloc && I->getDesc().isReturn()) + return false; + + // Avoid duplicating calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (PreRegAlloc && I->getDesc().isCall()) + return false; + if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; + + if (InstrCount > MaxDuplicateCount) + return false; } - // Don't tail-duplicate calls before register allocation. Calls presents a - // barrier to register allocation so duplicating them may end up increasing - // spills. - if (InstrCount > 1 && (PreRegAlloc && HasCall)) + + if (HasIndirectbr && PreRegAlloc) + return true; + + if (IsSimple) + return true; + + if (!PreRegAlloc) + return true; + + return canCompletelyDuplicateBB(TailBB); +} + +/// isSimpleBB - True if this BB has only one unconditional jump. +bool +TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { + if (TailBB->succ_size() != 1) + return false; + if (TailBB->pred_empty()) return false; + MachineBasicBlock::iterator I = TailBB->begin(); + MachineBasicBlock::iterator E = TailBB->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I == E) + return true; + return I->getDesc().isUnconditionalBranch(); +} + +static bool +bothUsedInPHI(const MachineBasicBlock &A, + SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { + for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), + SE = A.succ_end(); SI != SE; ++SI) { + MachineBasicBlock *BB = *SI; + if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) + return true; + } + + return false; +} + +bool +TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end()); + + for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), + PE = BB.pred_end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->succ_size() > 1) + return false; + + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + return false; + if (!PredCond.empty()) + return false; + } return true; } +bool +TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVector<MachineInstr*, 16> &Copies) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), + TailBB->succ_end()); + SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + bool Changed = false; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->getLandingPadSuccessor()) + continue; + + if (bothUsedInPHI(*PredBB, Succs)) + continue; + + MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + + Changed = true; + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From simple Succ: " << *TailBB); + + MachineBasicBlock *NewTarget = *TailBB->succ_begin(); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB)); + + // Make PredFBB explicit. + if (PredCond.empty()) + PredFBB = PredTBB; + + // Make fall through explicit. + if (!PredTBB) + PredTBB = NextBB; + if (!PredFBB) + PredFBB = NextBB; + + // Redirect + if (PredFBB == TailBB) + PredFBB = NewTarget; + if (PredTBB == TailBB) + PredTBB = NewTarget; + + // Make the branch unconditional if possible + if (PredTBB == PredFBB) { + PredCond.clear(); + PredFBB = NULL; + } + + // Avoid adding fall through branches. + if (PredFBB == NextBB) + PredFBB = NULL; + if (PredTBB == NextBB && PredFBB == NULL) + PredTBB = NULL; + + TII->RemoveBranch(*PredBB); + + if (PredTBB) + TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + + PredBB->removeSuccessor(TailBB); + unsigned NumSuccessors = PredBB->succ_size(); + assert(NumSuccessors <= 1); + if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) + PredBB->addSuccessor(NewTarget); + + TDBBs.push_back(PredBB); + } + return Changed; +} + /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool -TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, SmallVector<MachineBasicBlock*, 8> &TDBBs, SmallVector<MachineInstr*, 16> &Copies) { - if (!shouldTailDuplicate(MF, *TailBB)) - return false; - DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + DenseSet<unsigned> UsedByPhi; + getRegsUsedByPHIs(*TailBB, &UsedByPhi); + + if (IsSimple) + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); + // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), TailBB->pred_end()); - DenseSet<unsigned> UsedByPhi; - getRegsUsedByPHIs(*TailBB, &UsedByPhi); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -618,6 +797,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, TII->get(TargetOpcode::COPY), CopyInfos[i].first).addReg(CopyInfos[i].second)); } + + // Simplify + TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 34e2b33185b5..86e71d8ccbb6 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -59,8 +59,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool NewMI) const { - const TargetInstrDesc &TID = MI->getDesc(); - bool HasDef = TID.getNumDefs(); + const MCInstrDesc &MCID = MI->getDesc(); + bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. return 0; @@ -81,7 +81,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool ChangeReg0 = false; if (HasDef && MI->getOperand(0).getReg() == Reg1) { // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); Reg2IsKill = false; ChangeReg0 = true; @@ -119,12 +119,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isCommutable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isCommutable()) return false; // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this // is not true, then the target must implement this. - SrcOpIdx1 = TID.getNumDefs(); + SrcOpIdx1 = MCID.getNumDefs(); SrcOpIdx2 = SrcOpIdx1 + 1; if (!MI->getOperand(SrcOpIdx1).isReg() || !MI->getOperand(SrcOpIdx2).isReg()) @@ -137,12 +137,12 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { bool MadeChange = false; - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) { + if (MCID.OpInfo[i].isPredicate()) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { MO.setReg(Pred[j].getReg()); @@ -332,10 +332,10 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); // Avoid instructions obviously unsafe for remat. - if (TID.isNotDuplicable() || TID.mayStore() || + if (MCID.isNotDuplicable() || MCID.mayStore() || MI->hasUnmodeledSideEffects()) return false; @@ -345,7 +345,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return false; // Avoid instructions which load from potentially varying memory. - if (TID.mayLoad() && !MI->isInvariantLoad(AA)) + if (MCID.mayLoad() && !MI->isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index cdac42d1bf17..a3c562013b59 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -43,6 +43,19 @@ using namespace dwarf; // ELF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileELF::TargetLoweringObjectFileELF() + : TargetLoweringObjectFile(), + TLSDataSection(0), + TLSBSSSection(0), + DataRelSection(0), + DataRelLocalSection(0), + DataRelROSection(0), + DataRelROLocalSection(0), + MergeableConst4Section(0), + MergeableConst8Section(0), + MergeableConst16Section(0) { +} + void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -189,8 +202,8 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, return Mang->getSymbol(GV); break; case dwarf::DW_EH_PE_pcrel: { - Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName(); - return getContext().GetOrCreateSymbol(FullName); + return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + + Mang->getSymbol(GV)->getName()); break; } } @@ -199,13 +212,13 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM, const MCSymbol *Sym) const { - Twine FullName = StringRef("DW.ref.") + Sym->getName(); - MCSymbol *Label = getContext().GetOrCreateSymbol(FullName); + SmallString<64> NameData("DW.ref."); + NameData += Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(NameData); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); - Twine SectionName = StringRef(".data.") + Label->getName(); - SmallString<64> NameData; - SectionName.toVector(NameData); + StringRef Prefix = ".data."; + NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; const MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, @@ -480,6 +493,27 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // MachO //===----------------------------------------------------------------------===// +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile(), + TLSDataSection(0), + TLSBSSSection(0), + TLSTLVSection(0), + TLSThreadInitSection(0), + CStringSection(0), + UStringSection(0), + TextCoalSection(0), + ConstTextCoalSection(0), + ConstDataSection(0), + DataCoalSection(0), + DataCommonSection(0), + DataBSSSection(0), + FourByteConstantSection(0), + EightByteConstantSection(0), + SixteenByteConstantSection(0), + LazySymbolPointerSection(0), + NonLazySymbolPointerSection(0) { +} + void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { IsFunctionEHFrameSymbolPrivate = false; @@ -605,6 +639,13 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, // Exception Handling. LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, SectionKind::getReadOnlyWithRel()); + + if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + CompactUnwindSection = + getContext().getMachOSection("__LD", "__compact_unwind", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getReadOnly()); + // Debug Information. DwarfAbbrevSection = getContext().getMachOSection("__DWARF", "__debug_abbrev", @@ -884,6 +925,13 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { // COFF //===----------------------------------------------------------------------===// +TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF() + : TargetLoweringObjectFile(), + DrectveSection(0), + PDataSection(0), + XDataSection(0) { +} + void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFile::Initialize(Ctx, TM); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index f54d879759ff..6d6244e4f879 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -280,8 +280,8 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, /// isTwoAddrUse - Return true if the specified MI is using the specified /// register as a two-address operand. static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const TargetInstrDesc &TID = UseMI->getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + const MCInstrDesc &MCID = UseMI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(i); if (MO.isReg() && MO.getReg() == Reg && (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) @@ -443,8 +443,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands(); + const MCInstrDesc &MCID = MI.getDesc(); + unsigned NumOps = MI.isInlineAsm() + ? MI.getNumOperands() : MCID.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) @@ -761,10 +762,10 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector<unsigned, 4> &Kills) { - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayStore() || TID.isCall()) + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayStore() || MCID.isCall()) return false; - if (TID.isTerminator() || MI->hasUnmodeledSideEffects()) + if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -854,7 +855,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet<MachineInstr*, 8> &Processed) { - const TargetInstrDesc &TID = mi->getDesc(); + const MCInstrDesc &MCID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -876,7 +877,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (TID.isCommutable() && mi->getNumOperands() >= 3 && + if (MCID.isCommutable() && mi->getNumOperands() >= 3 && TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; @@ -907,7 +908,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); - if (TID.isConvertibleTo3Addr()) { + if (MCID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { @@ -927,7 +928,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // movq (%rax), %rcx // addq %rdx, %rcx // because it's preferable to schedule a load than a register copy. - if (TID.mayLoad() && !regBKilled) { + if (MCID.mayLoad() && !regBKilled) { // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = @@ -936,14 +937,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /*UnfoldStore=*/false, &LoadRegIndex); if (NewOpc != 0) { - const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); - if (UnfoldTID.getNumDefs() == 1) { + const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); + if (UnfoldMCID.getNumDefs() == 1) { MachineFunction &MF = *mbbi->getParent(); // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); const TargetRegisterClass *RC = - UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; if (!TII->unfoldMemoryOperand(MF, mi, Reg, @@ -1067,7 +1068,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const TargetInstrDesc &TID = mi->getDesc(); + const MCInstrDesc &MCID = mi->getDesc(); bool FirstTied = true; DistanceMap.insert(std::make_pair(mi, ++Dist)); @@ -1077,7 +1078,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : TID.getNumOperands(); + ? mi->getNumOperands() : MCID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) @@ -1095,12 +1096,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); - TiedOperandMap::iterator OI = TiedOperands.find(regB); - if (OI == TiedOperands.end()) { - SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair; - OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; - } - OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); + TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); } // Now iterate over the information collected above. diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index ba50f4e42302..03abff356934 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -208,6 +208,11 @@ namespace llvm { /// @brief returns the register allocation preference. unsigned getRegAllocPref(unsigned virtReg); + /// @brief returns true if VirtReg is assigned to its preferred physreg. + bool hasPreferredPhys(unsigned VirtReg) { + return getPhys(VirtReg) == getRegAllocPref(VirtReg); + } + /// @brief records virtReg is a split live interval from SReg. void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { Virt2SplitMap[virtReg] = SReg; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 185065880581..a5ec797b27db 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -679,8 +679,8 @@ static void ReMaterialize(MachineBasicBlock &MBB, VirtRegMap &VRM) { MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); #ifndef NDEBUG - const TargetInstrDesc &TID = ReMatDefMI->getDesc(); - assert(TID.getNumDefs() == 1 && + const MCInstrDesc &MCID = ReMatDefMI->getDesc(); + assert(MCID.getNumDefs() == 1 && "Don't know how to remat instructions that define > 1 values!"); #endif TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); @@ -1483,11 +1483,11 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, /// where SrcReg is r1 and it is tied to r0. Return true if after /// commuting this instruction it will be r0 = op r2, r1. static bool CommuteChangesDestination(MachineInstr *DefMI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, unsigned SrcReg, const TargetInstrInfo *TII, unsigned &DstIdx) { - if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3) + if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) return false; if (!DefMI->getOperand(1).isReg() || DefMI->getOperand(1).getReg() != SrcReg) @@ -1527,11 +1527,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr &MI = *MII; MachineBasicBlock::iterator DefMII = prior(MII); MachineInstr *DefMI = DefMII; - const TargetInstrDesc &TID = DefMI->getDesc(); + const MCInstrDesc &MCID = DefMI->getDesc(); unsigned NewDstIdx; if (DefMII != MBB->begin() && - TID.isCommutable() && - CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) { + MCID.isCommutable() && + CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) @@ -1658,9 +1658,9 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, /// isSafeToDelete - Return true if this instruction doesn't produce any side /// effect and all of its defs are dead. static bool isSafeToDelete(MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() || - TID.isCall() || TID.isBarrier() || TID.isReturn() || + const MCInstrDesc &MCID = MI.getDesc(); + if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || + MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || MI.isLabel() || MI.isDebugValue() || MI.hasUnmodeledSideEffects()) return false; diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 062256a2ac73..f7e2a4df951e 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -78,7 +78,6 @@ static char getTypeID(const Type *Ty) { case Type::FunctionTyID:return 'M'; case Type::StructTyID: return 'T'; case Type::ArrayTyID: return 'A'; - case Type::OpaqueTyID: return 'O'; default: return 'U'; } } @@ -282,10 +281,10 @@ GenericValue Interpreter::callExternalFunction(Function *F, if (F->getName() == "__main") errs() << "Tried to execute an unknown external function: " - << F->getType()->getDescription() << " __main\n"; + << *F->getType() << " __main\n"; else report_fatal_error("Tried to execute an unknown external function: " + - F->getType()->getDescription() + " " +F->getName()); + F->getName()); #ifndef USE_LIBFFI errs() << "Recompiling LLVM with --enable-libffi might help.\n"; #endif diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 8fceaf2b4931..445d2d0670c8 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -533,8 +533,7 @@ GenericValue JIT::runFunction(Function *F, Args.push_back(C); } - CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(), - "", StubBB); + CallInst *TheCall = CallInst::Create(F, Args, "", StubBB); TheCall->setCallingConv(F->getCallingConv()); TheCall->setTailCall(); if (!TheCall->getType()->isVoidTy()) diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt index 9e53f8757ec0..59bdfee3db43 100644 --- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -1,3 +1,4 @@ add_llvm_library(LLVMRuntimeDyld RuntimeDyld.cpp + RuntimeDyldMachO.cpp ) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index eda4cbbad52a..33dd70502798 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -1,4 +1,4 @@ -//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,118 +12,15 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dyld" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Twine.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/Object/MachOObject.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/system_error.h" -#include "llvm/Support/raw_ostream.h" +#include "RuntimeDyldImpl.h" using namespace llvm; using namespace llvm::object; // Empty out-of-line virtual destructor as the key function. RTDyldMemoryManager::~RTDyldMemoryManager() {} +RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { -class RuntimeDyldImpl { - unsigned CPUType; - unsigned CPUSubtype; - - // The MemoryManager to load objects into. - RTDyldMemoryManager *MemMgr; - - // FIXME: This all assumes we're dealing with external symbols for anything - // explicitly referenced. I.e., we can index by name and things - // will work out. In practice, this may not be the case, so we - // should find a way to effectively generalize. - - // For each function, we have a MemoryBlock of it's instruction data. - StringMap<sys::MemoryBlock> Functions; - - // Master symbol table. As modules are loaded and external symbols are - // resolved, their addresses are stored here. - StringMap<uint8_t*> SymbolTable; - - // For each symbol, keep a list of relocations based on it. Anytime - // its address is reassigned (the JIT re-compiled the function, e.g.), - // the relocations get re-resolved. - struct RelocationEntry { - std::string Target; // Object this relocation is contained in. - uint64_t Offset; // Offset into the object for the relocation. - uint32_t Data; // Second word of the raw macho relocation entry. - int64_t Addend; // Addend encoded in the instruction itself, if any. - bool isResolved; // Has this relocation been resolved previously? - - RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend) - : Target(t), Offset(offset), Data(data), Addend(addend), - isResolved(false) {} - }; - typedef SmallVector<RelocationEntry, 4> RelocationList; - StringMap<RelocationList> Relocations; - - // FIXME: Also keep a map of all the relocations contained in an object. Use - // this to dynamically answer whether all of the relocations in it have - // been resolved or not. - - bool HasError; - std::string ErrorStr; - - // Set the error state and record an error string. - bool Error(const Twine &Msg) { - ErrorStr = Msg.str(); - HasError = true; - return true; - } - - void extractFunction(StringRef Name, uint8_t *StartAddress, - uint8_t *EndAddress); - bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, - unsigned Type, unsigned Size); - bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, - unsigned Type, unsigned Size); - bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, - unsigned Type, unsigned Size); - - bool loadSegment32(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); - bool loadSegment64(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); - -public: - RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} - - bool loadObject(MemoryBuffer *InputBuffer); - - void *getSymbolAddress(StringRef Name) { - // FIXME: Just look up as a function for now. Overly simple of course. - // Work in progress. - return SymbolTable.lookup(Name); - } - - void resolveRelocations(); - - void reassignSymbolAddress(StringRef Name, uint8_t *Addr); - - // Is the linker in an error state? - bool hasError() { return HasError; } - - // Mark the error condition as handled and continue. - void clearError() { HasError = false; } - - // Get the error message. - StringRef getErrorString() { return ErrorStr; } -}; void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, uint8_t *EndAddress) { @@ -144,472 +41,6 @@ void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n"); } -bool RuntimeDyldImpl:: -resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, - unsigned Type, unsigned Size) { - // This just dispatches to the proper target specific routine. - switch (CPUType) { - default: assert(0 && "Unsupported CPU type!"); - case mach::CTM_x86_64: - return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, - isPCRel, Type, Size); - case mach::CTM_ARM: - return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, - isPCRel, Type, Size); - } - llvm_unreachable(""); -} - -bool RuntimeDyldImpl:: -resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, - bool isPCRel, unsigned Type, - unsigned Size) { - // If the relocation is PC-relative, the value to be encoded is the - // pointer difference. - if (isPCRel) - // FIXME: It seems this value needs to be adjusted by 4 for an effective PC - // address. Is that expected? Only for branches, perhaps? - Value -= Address + 4; - - switch(Type) { - default: - llvm_unreachable("Invalid relocation type!"); - case macho::RIT_X86_64_Unsigned: - case macho::RIT_X86_64_Branch: { - // Mask in the target value a byte at a time (we don't have an alignment - // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t*)Address; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; - } - return false; - } - case macho::RIT_X86_64_Signed: - case macho::RIT_X86_64_GOTLoad: - case macho::RIT_X86_64_GOT: - case macho::RIT_X86_64_Subtractor: - case macho::RIT_X86_64_Signed1: - case macho::RIT_X86_64_Signed2: - case macho::RIT_X86_64_Signed4: - case macho::RIT_X86_64_TLV: - return Error("Relocation type not implemented yet!"); - } - return false; -} - -bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value, - bool isPCRel, unsigned Type, - unsigned Size) { - // If the relocation is PC-relative, the value to be encoded is the - // pointer difference. - if (isPCRel) { - Value -= Address; - // ARM PCRel relocations have an effective-PC offset of two instructions - // (four bytes in Thumb mode, 8 bytes in ARM mode). - // FIXME: For now, assume ARM mode. - Value -= 8; - } - - switch(Type) { - default: - llvm_unreachable("Invalid relocation type!"); - case macho::RIT_Vanilla: { - llvm_unreachable("Invalid relocation type!"); - // Mask in the target value a byte at a time (we don't have an alignment - // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t*)Address; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; - } - break; - } - case macho::RIT_ARM_Branch24Bit: { - // Mask the value into the target address. We know instructions are - // 32-bit aligned, so we can do it all at once. - uint32_t *p = (uint32_t*)Address; - // The low two bits of the value are not encoded. - Value >>= 2; - // Mask the value to 24 bits. - Value &= 0xffffff; - // FIXME: If the destination is a Thumb function (and the instruction - // is a non-predicated BL instruction), we need to change it to a BLX - // instruction instead. - - // Insert the value into the instruction. - *p = (*p & ~0xffffff) | Value; - break; - } - case macho::RIT_ARM_ThumbBranch22Bit: - case macho::RIT_ARM_ThumbBranch32Bit: - case macho::RIT_ARM_Half: - case macho::RIT_ARM_HalfDifference: - case macho::RIT_Pair: - case macho::RIT_Difference: - case macho::RIT_ARM_LocalDifference: - case macho::RIT_ARM_PreboundLazyPointer: - return Error("Relocation type not implemented yet!"); - } - return false; -} - -bool RuntimeDyldImpl:: -loadSegment32(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { - InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; - Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); - if (!SegmentLC) - return Error("unable to load segment load command"); - - for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { - InMemoryStruct<macho::Section> Sect; - Obj->ReadSection(*SegmentLCI, SectNum, Sect); - if (!Sect) - return Error("unable to load section: '" + Twine(SectNum) + "'"); - - // FIXME: For the time being, we're only loading text segments. - if (Sect->Flags != 0x80000400) - continue; - - // Address and names of symbols in the section. - typedef std::pair<uint64_t, StringRef> SymbolEntry; - SmallVector<SymbolEntry, 64> Symbols; - // Index of all the names, in this section or not. Used when we're - // dealing with relocation entries. - SmallVector<StringRef, 64> SymbolNames; - for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { - InMemoryStruct<macho::SymbolTableEntry> STE; - Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); - if (!STE) - return Error("unable to read symbol: '" + Twine(i) + "'"); - if (STE->SectionIndex > SegmentLC->NumSections) - return Error("invalid section index for symbol: '" + Twine(i) + "'"); - // Get the symbol name. - StringRef Name = Obj->getStringAtIndex(STE->StringIndex); - SymbolNames.push_back(Name); - - // Just skip symbols not defined in this section. - if ((unsigned)STE->SectionIndex - 1 != SectNum) - continue; - - // FIXME: Check the symbol type and flags. - if (STE->Type != 0xF) // external, defined in this section. - continue; - // Flags == 0x8 marks a thumb function for ARM, which is fine as it - // doesn't require any special handling here. - if (STE->Flags != 0x0 && STE->Flags != 0x8) - continue; - - // Remember the symbol. - Symbols.push_back(SymbolEntry(STE->Value, Name)); - - DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << - (Sect->Address + STE->Value) << "\n"); - } - // Sort the symbols by address, just in case they didn't come in that way. - array_pod_sort(Symbols.begin(), Symbols.end()); - - // If there weren't any functions (odd, but just in case...) - if (!Symbols.size()) - continue; - - // Extract the function data. - uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, - SegmentLC->FileSize).data(); - for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { - uint64_t StartOffset = Sect->Address + Symbols[i].first; - uint64_t EndOffset = Symbols[i + 1].first - 1; - DEBUG(dbgs() << "Extracting function: " << Symbols[i].second - << " from [" << StartOffset << ", " << EndOffset << "]\n"); - extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); - } - // The last symbol we do after since the end address is calculated - // differently because there is no next symbol to reference. - uint64_t StartOffset = Symbols[Symbols.size() - 1].first; - uint64_t EndOffset = Sect->Size - 1; - DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second - << " from [" << StartOffset << ", " << EndOffset << "]\n"); - extractFunction(Symbols[Symbols.size()-1].second, - Base + StartOffset, Base + EndOffset); - - // Now extract the relocation information for each function and process it. - for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { - InMemoryStruct<macho::RelocationEntry> RE; - Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); - if (RE->Word0 & macho::RF_Scattered) - return Error("NOT YET IMPLEMENTED: scattered relocations."); - // Word0 of the relocation is the offset into the section where the - // relocation should be applied. We need to translate that into an - // offset into a function since that's our atom. - uint32_t Offset = RE->Word0; - // Look for the function containing the address. This is used for JIT - // code, so the number of functions in section is almost always going - // to be very small (usually just one), so until we have use cases - // where that's not true, just use a trivial linear search. - unsigned SymbolNum; - unsigned NumSymbols = Symbols.size(); - assert(NumSymbols > 0 && Symbols[0].first <= Offset && - "No symbol containing relocation!"); - for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) - if (Symbols[SymbolNum + 1].first > Offset) - break; - // Adjust the offset to be relative to the symbol. - Offset -= Symbols[SymbolNum].first; - // Get the name of the symbol containing the relocation. - StringRef TargetName = SymbolNames[SymbolNum]; - - bool isExtern = (RE->Word1 >> 27) & 1; - // Figure out the source symbol of the relocation. If isExtern is true, - // this relocation references the symbol table, otherwise it references - // a section in the same object, numbered from 1 through NumSections - // (SectionBases is [0, NumSections-1]). - // FIXME: Some targets (ARM) use internal relocations even for - // externally visible symbols, if the definition is in the same - // file as the reference. We need to convert those back to by-name - // references. We can resolve the address based on the section - // offset and see if we have a symbol at that address. If we do, - // use that; otherwise, puke. - if (!isExtern) - return Error("Internal relocations not supported."); - uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value - StringRef SourceName = SymbolNames[SourceNum]; - - // FIXME: Get the relocation addend from the target address. - - // Now store the relocation information. Associate it with the source - // symbol. - Relocations[SourceName].push_back(RelocationEntry(TargetName, - Offset, - RE->Word1, - 0 /*Addend*/)); - DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset - << " from '" << SourceName << "(Word1: " - << format("0x%x", RE->Word1) << ")\n"); - } - } - return false; -} - - -bool RuntimeDyldImpl:: -loadSegment64(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { - InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; - Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); - if (!Segment64LC) - return Error("unable to load segment load command"); - - for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { - InMemoryStruct<macho::Section64> Sect; - Obj->ReadSection64(*SegmentLCI, SectNum, Sect); - if (!Sect) - return Error("unable to load section: '" + Twine(SectNum) + "'"); - - // FIXME: For the time being, we're only loading text segments. - if (Sect->Flags != 0x80000400) - continue; - - // Address and names of symbols in the section. - typedef std::pair<uint64_t, StringRef> SymbolEntry; - SmallVector<SymbolEntry, 64> Symbols; - // Index of all the names, in this section or not. Used when we're - // dealing with relocation entries. - SmallVector<StringRef, 64> SymbolNames; - for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { - InMemoryStruct<macho::Symbol64TableEntry> STE; - Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); - if (!STE) - return Error("unable to read symbol: '" + Twine(i) + "'"); - if (STE->SectionIndex > Segment64LC->NumSections) - return Error("invalid section index for symbol: '" + Twine(i) + "'"); - // Get the symbol name. - StringRef Name = Obj->getStringAtIndex(STE->StringIndex); - SymbolNames.push_back(Name); - - // Just skip symbols not defined in this section. - if ((unsigned)STE->SectionIndex - 1 != SectNum) - continue; - - // FIXME: Check the symbol type and flags. - if (STE->Type != 0xF) // external, defined in this section. - continue; - if (STE->Flags != 0x0) - continue; - - // Remember the symbol. - Symbols.push_back(SymbolEntry(STE->Value, Name)); - - DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << - (Sect->Address + STE->Value) << "\n"); - } - // Sort the symbols by address, just in case they didn't come in that way. - array_pod_sort(Symbols.begin(), Symbols.end()); - - // If there weren't any functions (odd, but just in case...) - if (!Symbols.size()) - continue; - - // Extract the function data. - uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, - Segment64LC->FileSize).data(); - for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { - uint64_t StartOffset = Sect->Address + Symbols[i].first; - uint64_t EndOffset = Symbols[i + 1].first - 1; - DEBUG(dbgs() << "Extracting function: " << Symbols[i].second - << " from [" << StartOffset << ", " << EndOffset << "]\n"); - extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); - } - // The last symbol we do after since the end address is calculated - // differently because there is no next symbol to reference. - uint64_t StartOffset = Symbols[Symbols.size() - 1].first; - uint64_t EndOffset = Sect->Size - 1; - DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second - << " from [" << StartOffset << ", " << EndOffset << "]\n"); - extractFunction(Symbols[Symbols.size()-1].second, - Base + StartOffset, Base + EndOffset); - - // Now extract the relocation information for each function and process it. - for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { - InMemoryStruct<macho::RelocationEntry> RE; - Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); - if (RE->Word0 & macho::RF_Scattered) - return Error("NOT YET IMPLEMENTED: scattered relocations."); - // Word0 of the relocation is the offset into the section where the - // relocation should be applied. We need to translate that into an - // offset into a function since that's our atom. - uint32_t Offset = RE->Word0; - // Look for the function containing the address. This is used for JIT - // code, so the number of functions in section is almost always going - // to be very small (usually just one), so until we have use cases - // where that's not true, just use a trivial linear search. - unsigned SymbolNum; - unsigned NumSymbols = Symbols.size(); - assert(NumSymbols > 0 && Symbols[0].first <= Offset && - "No symbol containing relocation!"); - for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) - if (Symbols[SymbolNum + 1].first > Offset) - break; - // Adjust the offset to be relative to the symbol. - Offset -= Symbols[SymbolNum].first; - // Get the name of the symbol containing the relocation. - StringRef TargetName = SymbolNames[SymbolNum]; - - bool isExtern = (RE->Word1 >> 27) & 1; - // Figure out the source symbol of the relocation. If isExtern is true, - // this relocation references the symbol table, otherwise it references - // a section in the same object, numbered from 1 through NumSections - // (SectionBases is [0, NumSections-1]). - if (!isExtern) - return Error("Internal relocations not supported."); - uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value - StringRef SourceName = SymbolNames[SourceNum]; - - // FIXME: Get the relocation addend from the target address. - - // Now store the relocation information. Associate it with the source - // symbol. - Relocations[SourceName].push_back(RelocationEntry(TargetName, - Offset, - RE->Word1, - 0 /*Addend*/)); - DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset - << " from '" << SourceName << "(Word1: " - << format("0x%x", RE->Word1) << ")\n"); - } - } - return false; -} - -bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) { - // If the linker is in an error state, don't do anything. - if (hasError()) - return true; - // Load the Mach-O wrapper object. - std::string ErrorStr; - OwningPtr<MachOObject> Obj( - MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); - if (!Obj) - return Error("unable to load object: '" + ErrorStr + "'"); - - // Get the CPU type information from the header. - const macho::Header &Header = Obj->getHeader(); - - // FIXME: Error checking that the loaded object is compatible with - // the system we're running on. - CPUType = Header.CPUType; - CPUSubtype = Header.CPUSubtype; - - // Validate that the load commands match what we expect. - const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, - *DysymtabLCI = 0; - for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { - const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); - switch (LCI.Command.Type) { - case macho::LCT_Segment: - case macho::LCT_Segment64: - if (SegmentLCI) - return Error("unexpected input object (multiple segments)"); - SegmentLCI = &LCI; - break; - case macho::LCT_Symtab: - if (SymtabLCI) - return Error("unexpected input object (multiple symbol tables)"); - SymtabLCI = &LCI; - break; - case macho::LCT_Dysymtab: - if (DysymtabLCI) - return Error("unexpected input object (multiple symbol tables)"); - DysymtabLCI = &LCI; - break; - default: - return Error("unexpected input object (unexpected load command"); - } - } - - if (!SymtabLCI) - return Error("no symbol table found in object"); - if (!SegmentLCI) - return Error("no symbol table found in object"); - - // Read and register the symbol table data. - InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; - Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); - if (!SymtabLC) - return Error("unable to load symbol table load command"); - Obj->RegisterStringTable(*SymtabLC); - - // Read the dynamic link-edit information, if present (not present in static - // objects). - if (DysymtabLCI) { - InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC; - Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); - if (!DysymtabLC) - return Error("unable to load dynamic link-exit load command"); - - // FIXME: We don't support anything interesting yet. -// if (DysymtabLC->LocalSymbolsIndex != 0) -// return Error("NOT YET IMPLEMENTED: local symbol entries"); -// if (DysymtabLC->ExternalSymbolsIndex != 0) -// return Error("NOT YET IMPLEMENTED: non-external symbol entries"); -// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) -// return Error("NOT YET IMPLEMENTED: undefined symbol entries"); - } - - // Load the segment load command. - if (SegmentLCI->Command.Type == macho::LCT_Segment) { - if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) - return true; - } else { - if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) - return true; - } - - return false; -} - // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { // Just iterate over the symbols in our symbol table and assign their @@ -620,35 +51,11 @@ void RuntimeDyldImpl::resolveRelocations() { reassignSymbolAddress(i->getKey(), i->getValue()); } -// Assign an address to a symbol name and resolve all the relocations -// associated with it. -void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { - // Assign the address in our symbol table. - SymbolTable[Name] = Addr; - - RelocationList &Relocs = Relocations[Name]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - RelocationEntry &RE = Relocs[i]; - uint8_t *Target = SymbolTable[RE.Target] + RE.Offset; - bool isPCRel = (RE.Data >> 24) & 1; - unsigned Type = (RE.Data >> 28) & 0xf; - unsigned Size = 1 << ((RE.Data >> 25) & 3); - - DEBUG(dbgs() << "Resolving relocation at '" << RE.Target - << "' + " << RE.Offset << " (" << format("%p", Target) << ")" - << " from '" << Name << " (" << format("%p", Addr) << ")" - << "(" << (isPCRel ? "pcrel" : "absolute") - << ", type: " << Type << ", Size: " << Size << ").\n"); - - resolveRelocation(Target, Addr, isPCRel, Type, Size); - RE.isResolved = true; - } -} - //===----------------------------------------------------------------------===// // RuntimeDyld class implementation -RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) { - Dyld = new RuntimeDyldImpl(MM); +RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { + Dyld = 0; + MM = mm; } RuntimeDyld::~RuntimeDyld() { @@ -656,6 +63,16 @@ RuntimeDyld::~RuntimeDyld() { } bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) { + if (!Dyld) { + if (RuntimeDyldMachO::isKnownFormat(InputBuffer)) + Dyld = new RuntimeDyldMachO(MM); + else + report_fatal_error("Unknown object format!"); + } else { + if(!Dyld->isCompatibleFormat(InputBuffer)) + report_fatal_error("Incompatible object format!"); + } + return Dyld->loadObject(InputBuffer); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h new file mode 100644 index 000000000000..bcdfb04801a5 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -0,0 +1,152 @@ +//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface for the implementations of runtime dynamic linker facilities. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_RUNTIME_DYLD_IMPL_H +#define LLVM_RUNTIME_DYLD_IMPL_H + +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/Object/MachOObject.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/system_error.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace llvm::object; + +namespace llvm { +class RuntimeDyldImpl { +protected: + unsigned CPUType; + unsigned CPUSubtype; + + // The MemoryManager to load objects into. + RTDyldMemoryManager *MemMgr; + + // FIXME: This all assumes we're dealing with external symbols for anything + // explicitly referenced. I.e., we can index by name and things + // will work out. In practice, this may not be the case, so we + // should find a way to effectively generalize. + + // For each function, we have a MemoryBlock of it's instruction data. + StringMap<sys::MemoryBlock> Functions; + + // Master symbol table. As modules are loaded and external symbols are + // resolved, their addresses are stored here. + StringMap<uint8_t*> SymbolTable; + + bool HasError; + std::string ErrorStr; + + // Set the error state and record an error string. + bool Error(const Twine &Msg) { + ErrorStr = Msg.str(); + HasError = true; + return true; + } + + void extractFunction(StringRef Name, uint8_t *StartAddress, + uint8_t *EndAddress); + +public: + RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} + + virtual ~RuntimeDyldImpl(); + + virtual bool loadObject(MemoryBuffer *InputBuffer) = 0; + + void *getSymbolAddress(StringRef Name) { + // FIXME: Just look up as a function for now. Overly simple of course. + // Work in progress. + return SymbolTable.lookup(Name); + } + + void resolveRelocations(); + + virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0; + + // Is the linker in an error state? + bool hasError() { return HasError; } + + // Mark the error condition as handled and continue. + void clearError() { HasError = false; } + + // Get the error message. + StringRef getErrorString() { return ErrorStr; } + + virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0; +}; + + +class RuntimeDyldMachO : public RuntimeDyldImpl { + + // For each symbol, keep a list of relocations based on it. Anytime + // its address is reassigned (the JIT re-compiled the function, e.g.), + // the relocations get re-resolved. + struct RelocationEntry { + std::string Target; // Object this relocation is contained in. + uint64_t Offset; // Offset into the object for the relocation. + uint32_t Data; // Second word of the raw macho relocation entry. + int64_t Addend; // Addend encoded in the instruction itself, if any. + bool isResolved; // Has this relocation been resolved previously? + + RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend) + : Target(t), Offset(offset), Data(data), Addend(addend), + isResolved(false) {} + }; + typedef SmallVector<RelocationEntry, 4> RelocationList; + StringMap<RelocationList> Relocations; + + // FIXME: Also keep a map of all the relocations contained in an object. Use + // this to dynamically answer whether all of the relocations in it have + // been resolved or not. + + bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, + unsigned Type, unsigned Size); + bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, + unsigned Type, unsigned Size); + bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, + unsigned Type, unsigned Size); + + bool loadSegment32(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); + bool loadSegment64(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); + +public: + RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} + + bool loadObject(MemoryBuffer *InputBuffer); + + void reassignSymbolAddress(StringRef Name, uint8_t *Addr); + + static bool isKnownFormat(const MemoryBuffer *InputBuffer); + + bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const { + return isKnownFormat(InputBuffer); + }; +}; + +} // end namespace llvm + + +#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp new file mode 100644 index 000000000000..623e9b2acca3 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -0,0 +1,524 @@ +//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dyld" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include "RuntimeDyldImpl.h" +using namespace llvm; +using namespace llvm::object; + +namespace llvm { + +bool RuntimeDyldMachO:: +resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, + unsigned Type, unsigned Size) { + // This just dispatches to the proper target specific routine. + switch (CPUType) { + default: assert(0 && "Unsupported CPU type!"); + case mach::CTM_x86_64: + return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, + isPCRel, Type, Size); + case mach::CTM_ARM: + return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, + isPCRel, Type, Size); + } + llvm_unreachable(""); +} + +bool RuntimeDyldMachO:: +resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, + bool isPCRel, unsigned Type, + unsigned Size) { + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) + // FIXME: It seems this value needs to be adjusted by 4 for an effective PC + // address. Is that expected? Only for branches, perhaps? + Value -= Address + 4; + + switch(Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_X86_64_Unsigned: + case macho::RIT_X86_64_Branch: { + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)Address; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)Value; + Value >>= 8; + } + return false; + } + case macho::RIT_X86_64_Signed: + case macho::RIT_X86_64_GOTLoad: + case macho::RIT_X86_64_GOT: + case macho::RIT_X86_64_Subtractor: + case macho::RIT_X86_64_Signed1: + case macho::RIT_X86_64_Signed2: + case macho::RIT_X86_64_Signed4: + case macho::RIT_X86_64_TLV: + return Error("Relocation type not implemented yet!"); + } + return false; +} + +bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value, + bool isPCRel, unsigned Type, + unsigned Size) { + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) { + Value -= Address; + // ARM PCRel relocations have an effective-PC offset of two instructions + // (four bytes in Thumb mode, 8 bytes in ARM mode). + // FIXME: For now, assume ARM mode. + Value -= 8; + } + + switch(Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_Vanilla: { + llvm_unreachable("Invalid relocation type!"); + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)Address; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)Value; + Value >>= 8; + } + break; + } + case macho::RIT_ARM_Branch24Bit: { + // Mask the value into the target address. We know instructions are + // 32-bit aligned, so we can do it all at once. + uint32_t *p = (uint32_t*)Address; + // The low two bits of the value are not encoded. + Value >>= 2; + // Mask the value to 24 bits. + Value &= 0xffffff; + // FIXME: If the destination is a Thumb function (and the instruction + // is a non-predicated BL instruction), we need to change it to a BLX + // instruction instead. + + // Insert the value into the instruction. + *p = (*p & ~0xffffff) | Value; + break; + } + case macho::RIT_ARM_ThumbBranch22Bit: + case macho::RIT_ARM_ThumbBranch32Bit: + case macho::RIT_ARM_Half: + case macho::RIT_ARM_HalfDifference: + case macho::RIT_Pair: + case macho::RIT_Difference: + case macho::RIT_ARM_LocalDifference: + case macho::RIT_ARM_PreboundLazyPointer: + return Error("Relocation type not implemented yet!"); + } + return false; +} + +bool RuntimeDyldMachO:: +loadSegment32(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { + InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; + Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); + if (!SegmentLC) + return Error("unable to load segment load command"); + + for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { + InMemoryStruct<macho::Section> Sect; + Obj->ReadSection(*SegmentLCI, SectNum, Sect); + if (!Sect) + return Error("unable to load section: '" + Twine(SectNum) + "'"); + + // FIXME: For the time being, we're only loading text segments. + if (Sect->Flags != 0x80000400) + continue; + + // Address and names of symbols in the section. + typedef std::pair<uint64_t, StringRef> SymbolEntry; + SmallVector<SymbolEntry, 64> Symbols; + // Index of all the names, in this section or not. Used when we're + // dealing with relocation entries. + SmallVector<StringRef, 64> SymbolNames; + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct<macho::SymbolTableEntry> STE; + Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); + if (!STE) + return Error("unable to read symbol: '" + Twine(i) + "'"); + if (STE->SectionIndex > SegmentLC->NumSections) + return Error("invalid section index for symbol: '" + Twine(i) + "'"); + // Get the symbol name. + StringRef Name = Obj->getStringAtIndex(STE->StringIndex); + SymbolNames.push_back(Name); + + // Just skip symbols not defined in this section. + if ((unsigned)STE->SectionIndex - 1 != SectNum) + continue; + + // FIXME: Check the symbol type and flags. + if (STE->Type != 0xF) // external, defined in this section. + continue; + // Flags == 0x8 marks a thumb function for ARM, which is fine as it + // doesn't require any special handling here. + if (STE->Flags != 0x0 && STE->Flags != 0x8) + continue; + + // Remember the symbol. + Symbols.push_back(SymbolEntry(STE->Value, Name)); + + DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << + (Sect->Address + STE->Value) << "\n"); + } + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + + // If there weren't any functions (odd, but just in case...) + if (!Symbols.size()) + continue; + + // Extract the function data. + uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, + SegmentLC->FileSize).data(); + for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { + uint64_t StartOffset = Sect->Address + Symbols[i].first; + uint64_t EndOffset = Symbols[i + 1].first - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[i].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); + } + // The last symbol we do after since the end address is calculated + // differently because there is no next symbol to reference. + uint64_t StartOffset = Symbols[Symbols.size() - 1].first; + uint64_t EndOffset = Sect->Size - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[Symbols.size()-1].second, + Base + StartOffset, Base + EndOffset); + + // Now extract the relocation information for each function and process it. + for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { + InMemoryStruct<macho::RelocationEntry> RE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); + if (RE->Word0 & macho::RF_Scattered) + return Error("NOT YET IMPLEMENTED: scattered relocations."); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied. We need to translate that into an + // offset into a function since that's our atom. + uint32_t Offset = RE->Word0; + // Look for the function containing the address. This is used for JIT + // code, so the number of functions in section is almost always going + // to be very small (usually just one), so until we have use cases + // where that's not true, just use a trivial linear search. + unsigned SymbolNum; + unsigned NumSymbols = Symbols.size(); + assert(NumSymbols > 0 && Symbols[0].first <= Offset && + "No symbol containing relocation!"); + for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) + if (Symbols[SymbolNum + 1].first > Offset) + break; + // Adjust the offset to be relative to the symbol. + Offset -= Symbols[SymbolNum].first; + // Get the name of the symbol containing the relocation. + StringRef TargetName = SymbolNames[SymbolNum]; + + bool isExtern = (RE->Word1 >> 27) & 1; + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + // FIXME: Some targets (ARM) use internal relocations even for + // externally visible symbols, if the definition is in the same + // file as the reference. We need to convert those back to by-name + // references. We can resolve the address based on the section + // offset and see if we have a symbol at that address. If we do, + // use that; otherwise, puke. + if (!isExtern) + return Error("Internal relocations not supported."); + uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + StringRef SourceName = SymbolNames[SourceNum]; + + // FIXME: Get the relocation addend from the target address. + + // Now store the relocation information. Associate it with the source + // symbol. + Relocations[SourceName].push_back(RelocationEntry(TargetName, + Offset, + RE->Word1, + 0 /*Addend*/)); + DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset + << " from '" << SourceName << "(Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } + } + return false; +} + + +bool RuntimeDyldMachO:: +loadSegment64(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { + InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; + Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); + if (!Segment64LC) + return Error("unable to load segment load command"); + + for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { + InMemoryStruct<macho::Section64> Sect; + Obj->ReadSection64(*SegmentLCI, SectNum, Sect); + if (!Sect) + return Error("unable to load section: '" + Twine(SectNum) + "'"); + + // FIXME: For the time being, we're only loading text segments. + if (Sect->Flags != 0x80000400) + continue; + + // Address and names of symbols in the section. + typedef std::pair<uint64_t, StringRef> SymbolEntry; + SmallVector<SymbolEntry, 64> Symbols; + // Index of all the names, in this section or not. Used when we're + // dealing with relocation entries. + SmallVector<StringRef, 64> SymbolNames; + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct<macho::Symbol64TableEntry> STE; + Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); + if (!STE) + return Error("unable to read symbol: '" + Twine(i) + "'"); + if (STE->SectionIndex > Segment64LC->NumSections) + return Error("invalid section index for symbol: '" + Twine(i) + "'"); + // Get the symbol name. + StringRef Name = Obj->getStringAtIndex(STE->StringIndex); + SymbolNames.push_back(Name); + + // Just skip symbols not defined in this section. + if ((unsigned)STE->SectionIndex - 1 != SectNum) + continue; + + // FIXME: Check the symbol type and flags. + if (STE->Type != 0xF) // external, defined in this section. + continue; + if (STE->Flags != 0x0) + continue; + + // Remember the symbol. + Symbols.push_back(SymbolEntry(STE->Value, Name)); + + DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << + (Sect->Address + STE->Value) << "\n"); + } + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + + // If there weren't any functions (odd, but just in case...) + if (!Symbols.size()) + continue; + + // Extract the function data. + uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, + Segment64LC->FileSize).data(); + for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { + uint64_t StartOffset = Sect->Address + Symbols[i].first; + uint64_t EndOffset = Symbols[i + 1].first - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[i].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); + } + // The last symbol we do after since the end address is calculated + // differently because there is no next symbol to reference. + uint64_t StartOffset = Symbols[Symbols.size() - 1].first; + uint64_t EndOffset = Sect->Size - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[Symbols.size()-1].second, + Base + StartOffset, Base + EndOffset); + + // Now extract the relocation information for each function and process it. + for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { + InMemoryStruct<macho::RelocationEntry> RE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); + if (RE->Word0 & macho::RF_Scattered) + return Error("NOT YET IMPLEMENTED: scattered relocations."); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied. We need to translate that into an + // offset into a function since that's our atom. + uint32_t Offset = RE->Word0; + // Look for the function containing the address. This is used for JIT + // code, so the number of functions in section is almost always going + // to be very small (usually just one), so until we have use cases + // where that's not true, just use a trivial linear search. + unsigned SymbolNum; + unsigned NumSymbols = Symbols.size(); + assert(NumSymbols > 0 && Symbols[0].first <= Offset && + "No symbol containing relocation!"); + for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) + if (Symbols[SymbolNum + 1].first > Offset) + break; + // Adjust the offset to be relative to the symbol. + Offset -= Symbols[SymbolNum].first; + // Get the name of the symbol containing the relocation. + StringRef TargetName = SymbolNames[SymbolNum]; + + bool isExtern = (RE->Word1 >> 27) & 1; + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + if (!isExtern) + return Error("Internal relocations not supported."); + uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + StringRef SourceName = SymbolNames[SourceNum]; + + // FIXME: Get the relocation addend from the target address. + + // Now store the relocation information. Associate it with the source + // symbol. + Relocations[SourceName].push_back(RelocationEntry(TargetName, + Offset, + RE->Word1, + 0 /*Addend*/)); + DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset + << " from '" << SourceName << "(Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } + } + return false; +} + +bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) { + // If the linker is in an error state, don't do anything. + if (hasError()) + return true; + // Load the Mach-O wrapper object. + std::string ErrorStr; + OwningPtr<MachOObject> Obj( + MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); + if (!Obj) + return Error("unable to load object: '" + ErrorStr + "'"); + + // Get the CPU type information from the header. + const macho::Header &Header = Obj->getHeader(); + + // FIXME: Error checking that the loaded object is compatible with + // the system we're running on. + CPUType = Header.CPUType; + CPUSubtype = Header.CPUSubtype; + + // Validate that the load commands match what we expect. + const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, + *DysymtabLCI = 0; + for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { + const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); + switch (LCI.Command.Type) { + case macho::LCT_Segment: + case macho::LCT_Segment64: + if (SegmentLCI) + return Error("unexpected input object (multiple segments)"); + SegmentLCI = &LCI; + break; + case macho::LCT_Symtab: + if (SymtabLCI) + return Error("unexpected input object (multiple symbol tables)"); + SymtabLCI = &LCI; + break; + case macho::LCT_Dysymtab: + if (DysymtabLCI) + return Error("unexpected input object (multiple symbol tables)"); + DysymtabLCI = &LCI; + break; + default: + return Error("unexpected input object (unexpected load command"); + } + } + + if (!SymtabLCI) + return Error("no symbol table found in object"); + if (!SegmentLCI) + return Error("no symbol table found in object"); + + // Read and register the symbol table data. + InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; + Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); + if (!SymtabLC) + return Error("unable to load symbol table load command"); + Obj->RegisterStringTable(*SymtabLC); + + // Read the dynamic link-edit information, if present (not present in static + // objects). + if (DysymtabLCI) { + InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC; + Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); + if (!DysymtabLC) + return Error("unable to load dynamic link-exit load command"); + + // FIXME: We don't support anything interesting yet. +// if (DysymtabLC->LocalSymbolsIndex != 0) +// return Error("NOT YET IMPLEMENTED: local symbol entries"); +// if (DysymtabLC->ExternalSymbolsIndex != 0) +// return Error("NOT YET IMPLEMENTED: non-external symbol entries"); +// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) +// return Error("NOT YET IMPLEMENTED: undefined symbol entries"); + } + + // Load the segment load command. + if (SegmentLCI->Command.Type == macho::LCT_Segment) { + if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) + return true; + } else { + if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) + return true; + } + + return false; +} + +// Assign an address to a symbol name and resolve all the relocations +// associated with it. +void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { + // Assign the address in our symbol table. + SymbolTable[Name] = Addr; + + RelocationList &Relocs = Relocations[Name]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + RelocationEntry &RE = Relocs[i]; + uint8_t *Target = SymbolTable[RE.Target] + RE.Offset; + bool isPCRel = (RE.Data >> 24) & 1; + unsigned Type = (RE.Data >> 28) & 0xf; + unsigned Size = 1 << ((RE.Data >> 25) & 3); + + DEBUG(dbgs() << "Resolving relocation at '" << RE.Target + << "' + " << RE.Offset << " (" << format("%p", Target) << ")" + << " from '" << Name << " (" << format("%p", Addr) << ")" + << "(" << (isPCRel ? "pcrel" : "absolute") + << ", type: " << Type << ", Size: " << Size << ").\n"); + + resolveRelocation(Target, Addr, isPCRel, Type, Size); + RE.isResolved = true; + } +} + +bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) { + StringRef Magic = InputBuffer->getBuffer().slice(0, 4); + if (Magic == "\xFE\xED\xFA\xCE") return true; + if (Magic == "\xCE\xFA\xED\xFE") return true; + if (Magic == "\xFE\xED\xFA\xCF") return true; + if (Magic == "\xCF\xFA\xED\xFE") return true; + return false; +} + +} // end namespace llvm diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index a8822e58d40f..f51aff3603b8 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -16,10 +16,10 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/Module.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" -#include "llvm/Target/SubtargetFeature.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; @@ -75,9 +75,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, // Package up features to be passed to target/subtarget std::string FeaturesStr; - if (!MCPU.empty() || !MAttrs.empty()) { + if (!MAttrs.empty()) { SubtargetFeatures Features; - Features.setCPU(MCPU); for (unsigned i = 0; i != MAttrs.size(); ++i) Features.AddFeature(MAttrs[i]); FeaturesStr = Features.getString(); @@ -85,7 +84,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, // Allocate a target... TargetMachine *Target = - TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr); + TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr); assert(Target && "Could not allocate target machine!"); return Target; } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index f372db2403c9..55aa9bf18887 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -9,337 +9,404 @@ // // This file implements the LLVM module linker. // -// Specifically, this: -// * Merges global variables between the two modules -// * Uninit + Uninit = Init, Init + Uninit = Init, Init + Init = Error if != -// * Merges functions between two modules -// //===----------------------------------------------------------------------===// #include "llvm/Linker.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/ValueSymbolTable.h" -#include "llvm/Instructions.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/ADT/DenseMap.h" using namespace llvm; -// Error - Simple wrapper function to conditionally assign to E and return true. -// This just makes error return conditions a little bit simpler... -static inline bool Error(std::string *E, const Twine &Message) { - if (E) *E = Message.str(); - return true; -} - -// Function: ResolveTypes() -// -// Description: -// Attempt to link the two specified types together. -// -// Inputs: -// DestTy - The type to which we wish to resolve. -// SrcTy - The original type which we want to resolve. -// -// Outputs: -// DestST - The symbol table in which the new type should be placed. -// -// Return value: -// true - There is an error and the types cannot yet be linked. -// false - No errors. -// -static bool ResolveTypes(const Type *DestTy, const Type *SrcTy) { - if (DestTy == SrcTy) return false; // If already equal, noop - assert(DestTy && SrcTy && "Can't handle null types"); - - if (const OpaqueType *OT = dyn_cast<OpaqueType>(DestTy)) { - // Type _is_ in module, just opaque... - const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(SrcTy); - } else if (const OpaqueType *OT = dyn_cast<OpaqueType>(SrcTy)) { - const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(DestTy); - } else { - return true; // Cannot link types... not-equal and neither is opaque. - } - return false; -} +//===----------------------------------------------------------------------===// +// TypeMap implementation. +//===----------------------------------------------------------------------===// -/// LinkerTypeMap - This implements a map of types that is stable -/// even if types are resolved/refined to other types. This is not a general -/// purpose map, it is specific to the linker's use. namespace { -class LinkerTypeMap : public AbstractTypeUser { - typedef DenseMap<const Type*, PATypeHolder> TheMapTy; - TheMapTy TheMap; - - LinkerTypeMap(const LinkerTypeMap&); // DO NOT IMPLEMENT - void operator=(const LinkerTypeMap&); // DO NOT IMPLEMENT +class TypeMapTy : public ValueMapTypeRemapper { + /// MappedTypes - This is a mapping from a source type to a destination type + /// to use. + DenseMap<Type*, Type*> MappedTypes; + + /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic, + /// we speculatively add types to MappedTypes, but keep track of them here in + /// case we need to roll back. + SmallVector<Type*, 16> SpeculativeTypes; + + /// DefinitionsToResolve - This is a list of non-opaque structs in the source + /// module that are mapped to an opaque struct in the destination module. + SmallVector<StructType*, 16> DefinitionsToResolve; public: - LinkerTypeMap() {} - ~LinkerTypeMap() { - for (DenseMap<const Type*, PATypeHolder>::iterator I = TheMap.begin(), - E = TheMap.end(); I != E; ++I) - I->first->removeAbstractTypeUser(this); - } - - /// lookup - Return the value for the specified type or null if it doesn't - /// exist. - const Type *lookup(const Type *Ty) const { - TheMapTy::const_iterator I = TheMap.find(Ty); - if (I != TheMap.end()) return I->second; - return 0; - } - - /// insert - This returns true if the pointer was new to the set, false if it - /// was already in the set. - bool insert(const Type *Src, const Type *Dst) { - if (!TheMap.insert(std::make_pair(Src, PATypeHolder(Dst))).second) - return false; // Already in map. - if (Src->isAbstract()) - Src->addAbstractTypeUser(this); - return true; - } - -protected: - /// refineAbstractType - The callback method invoked when an abstract type is - /// resolved to another type. An object must override this method to update - /// its internal state to reference NewType instead of OldType. - /// - virtual void refineAbstractType(const DerivedType *OldTy, - const Type *NewTy) { - TheMapTy::iterator I = TheMap.find(OldTy); - const Type *DstTy = I->second; - - TheMap.erase(I); - if (OldTy->isAbstract()) - OldTy->removeAbstractTypeUser(this); - - // Don't reinsert into the map if the key is concrete now. - if (NewTy->isAbstract()) - insert(NewTy, DstTy); + + /// addTypeMapping - Indicate that the specified type in the destination + /// module is conceptually equivalent to the specified type in the source + /// module. + void addTypeMapping(Type *DstTy, Type *SrcTy); + + /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest + /// module from a type definition in the source module. + void linkDefinedTypeBodies(); + + /// get - Return the mapped type to use for the specified input type from the + /// source module. + Type *get(Type *SrcTy); + + FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));} + +private: + Type *getImpl(Type *T); + /// remapType - Implement the ValueMapTypeRemapper interface. + Type *remapType(Type *SrcTy) { + return get(SrcTy); } + + bool areTypesIsomorphic(Type *DstTy, Type *SrcTy); +}; +} - /// The other case which AbstractTypeUsers must be aware of is when a type - /// makes the transition from being abstract (where it has clients on it's - /// AbstractTypeUsers list) to concrete (where it does not). This method - /// notifies ATU's when this occurs for a type. - virtual void typeBecameConcrete(const DerivedType *AbsTy) { - TheMap.erase(AbsTy); - AbsTy->removeAbstractTypeUser(this); +void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) { + Type *&Entry = MappedTypes[SrcTy]; + if (Entry) return; + + if (DstTy == SrcTy) { + Entry = DstTy; + return; } - - // for debugging... - virtual void dump() const { - dbgs() << "AbstractTypeSet!\n"; + + // Check to see if these types are recursively isomorphic and establish a + // mapping between them if so. + if (!areTypesIsomorphic(DstTy, SrcTy)) { + // Oops, they aren't isomorphic. Just discard this request by rolling out + // any speculative mappings we've established. + for (unsigned i = 0, e = SpeculativeTypes.size(); i != e; ++i) + MappedTypes.erase(SpeculativeTypes[i]); } -}; + SpeculativeTypes.clear(); } - -// RecursiveResolveTypes - This is just like ResolveTypes, except that it -// recurses down into derived types, merging the used types if the parent types -// are compatible. -static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy, - LinkerTypeMap &Pointers) { - if (DstTy == SrcTy) return false; // If already equal, noop - - // If we found our opaque type, resolve it now! - if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy()) - return ResolveTypes(DstTy, SrcTy); - - // Two types cannot be resolved together if they are of different primitive - // type. For example, we cannot resolve an int to a float. - if (DstTy->getTypeID() != SrcTy->getTypeID()) return true; - - // If neither type is abstract, then they really are just different types. - if (!DstTy->isAbstract() && !SrcTy->isAbstract()) - return true; - - // Otherwise, resolve the used type used by this derived type... - switch (DstTy->getTypeID()) { - default: +/// areTypesIsomorphic - Recursively walk this pair of types, returning true +/// if they are isomorphic, false if they are not. +bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { + // Two types with differing kinds are clearly not isomorphic. + if (DstTy->getTypeID() != SrcTy->getTypeID()) return false; + + // If we have an entry in the MappedTypes table, then we have our answer. + Type *&Entry = MappedTypes[SrcTy]; + if (Entry) + return Entry == DstTy; + + // Two identical types are clearly isomorphic. Remember this + // non-speculatively. + if (DstTy == SrcTy) { + Entry = DstTy; return true; - case Type::FunctionTyID: { - const FunctionType *DstFT = cast<FunctionType>(DstTy); - const FunctionType *SrcFT = cast<FunctionType>(SrcTy); - if (DstFT->isVarArg() != SrcFT->isVarArg() || - DstFT->getNumContainedTypes() != SrcFT->getNumContainedTypes()) - return true; - - // Use TypeHolder's so recursive resolution won't break us. - PATypeHolder ST(SrcFT), DT(DstFT); - for (unsigned i = 0, e = DstFT->getNumContainedTypes(); i != e; ++i) { - const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i); - if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers)) - return true; - } - return false; } - case Type::StructTyID: { - const StructType *DstST = cast<StructType>(DstTy); - const StructType *SrcST = cast<StructType>(SrcTy); - if (DstST->getNumContainedTypes() != SrcST->getNumContainedTypes()) + + // Okay, we have two types with identical kinds that we haven't seen before. + + // If this is an opaque struct type, special case it. + if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) { + // Mapping an opaque type to any struct, just keep the dest struct. + if (SSTy->isOpaque()) { + Entry = DstTy; + SpeculativeTypes.push_back(SrcTy); return true; + } - PATypeHolder ST(SrcST), DT(DstST); - for (unsigned i = 0, e = DstST->getNumContainedTypes(); i != e; ++i) { - const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i); - if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers)) - return true; + // Mapping a non-opaque source type to an opaque dest. Keep the dest, but + // fill it in later. This doesn't need to be speculative. + if (cast<StructType>(DstTy)->isOpaque()) { + Entry = DstTy; + DefinitionsToResolve.push_back(SSTy); + return true; } - return false; - } - case Type::ArrayTyID: { - const ArrayType *DAT = cast<ArrayType>(DstTy); - const ArrayType *SAT = cast<ArrayType>(SrcTy); - if (DAT->getNumElements() != SAT->getNumElements()) return true; - return RecursiveResolveTypesI(DAT->getElementType(), SAT->getElementType(), - Pointers); } - case Type::VectorTyID: { - const VectorType *DVT = cast<VectorType>(DstTy); - const VectorType *SVT = cast<VectorType>(SrcTy); - if (DVT->getNumElements() != SVT->getNumElements()) return true; - return RecursiveResolveTypesI(DVT->getElementType(), SVT->getElementType(), - Pointers); + + // If the number of subtypes disagree between the two types, then we fail. + if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes()) + return false; + + // Fail if any of the extra properties (e.g. array size) of the type disagree. + if (isa<IntegerType>(DstTy)) + return false; // bitwidth disagrees. + if (PointerType *PT = dyn_cast<PointerType>(DstTy)) { + if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace()) + return false; + } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) { + if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg()) + return false; + } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) { + StructType *SSTy = cast<StructType>(SrcTy); + if (DSTy->isAnonymous() != SSTy->isAnonymous() || + DSTy->isPacked() != SSTy->isPacked()) + return false; + } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) { + if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements()) + return false; + } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) { + if (DVTy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements()) + return false; } - case Type::PointerTyID: { - const PointerType *DstPT = cast<PointerType>(DstTy); - const PointerType *SrcPT = cast<PointerType>(SrcTy); - if (DstPT->getAddressSpace() != SrcPT->getAddressSpace()) - return true; + // Otherwise, we speculate that these two types will line up and recursively + // check the subelements. + Entry = DstTy; + SpeculativeTypes.push_back(SrcTy); + + for (unsigned i = 0, e = SrcTy->getNumContainedTypes(); i != e; ++i) + if (!areTypesIsomorphic(DstTy->getContainedType(i), + SrcTy->getContainedType(i))) + return false; + + // If everything seems to have lined up, then everything is great. + return true; +} - // If this is a pointer type, check to see if we have already seen it. If - // so, we are in a recursive branch. Cut off the search now. We cannot use - // an associative container for this search, because the type pointers (keys - // in the container) change whenever types get resolved. - if (SrcPT->isAbstract()) - if (const Type *ExistingDestTy = Pointers.lookup(SrcPT)) - return ExistingDestTy != DstPT; - - if (DstPT->isAbstract()) - if (const Type *ExistingSrcTy = Pointers.lookup(DstPT)) - return ExistingSrcTy != SrcPT; - // Otherwise, add the current pointers to the vector to stop recursion on - // this pair. - if (DstPT->isAbstract()) - Pointers.insert(DstPT, SrcPT); - if (SrcPT->isAbstract()) - Pointers.insert(SrcPT, DstPT); - - return RecursiveResolveTypesI(DstPT->getElementType(), - SrcPT->getElementType(), Pointers); - } +/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest +/// module from a type definition in the source module. +void TypeMapTy::linkDefinedTypeBodies() { + SmallVector<Type*, 16> Elements; + SmallString<16> TmpName; + + // Note that processing entries in this loop (calling 'get') can add new + // entries to the DefinitionsToResolve vector. + while (!DefinitionsToResolve.empty()) { + StructType *SrcSTy = DefinitionsToResolve.pop_back_val(); + StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]); + + // TypeMap is a many-to-one mapping, if there were multiple types that + // provide a body for DstSTy then previous iterations of this loop may have + // already handled it. Just ignore this case. + if (!DstSTy->isOpaque()) continue; + assert(!SrcSTy->isOpaque() && "Not resolving a definition?"); + + // Map the body of the source type over to a new body for the dest type. + Elements.resize(SrcSTy->getNumElements()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) + Elements[i] = getImpl(SrcSTy->getElementType(i)); + + DstSTy->setBody(Elements, SrcSTy->isPacked()); + + // If DstSTy has no name or has a longer name than STy, then viciously steal + // STy's name. + if (!SrcSTy->hasName()) continue; + StringRef SrcName = SrcSTy->getName(); + + if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) { + TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end()); + SrcSTy->setName(""); + DstSTy->setName(TmpName.str()); + TmpName.clear(); + } } } -static bool RecursiveResolveTypes(const Type *DestTy, const Type *SrcTy) { - LinkerTypeMap PointerTypes; - return RecursiveResolveTypesI(DestTy, SrcTy, PointerTypes); -} +/// get - Return the mapped type to use for the specified input type from the +/// source module. +Type *TypeMapTy::get(Type *Ty) { + Type *Result = getImpl(Ty); + + // If this caused a reference to any struct type, resolve it before returning. + if (!DefinitionsToResolve.empty()) + linkDefinedTypeBodies(); + return Result; +} -// LinkTypes - Go through the symbol table of the Src module and see if any -// types are named in the src module that are not named in the Dst module. -// Make sure there are no type name conflicts. -static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) { - TypeSymbolTable *DestST = &Dest->getTypeSymbolTable(); - const TypeSymbolTable *SrcST = &Src->getTypeSymbolTable(); - - // Look for a type plane for Type's... - TypeSymbolTable::const_iterator TI = SrcST->begin(); - TypeSymbolTable::const_iterator TE = SrcST->end(); - if (TI == TE) return false; // No named types, do nothing. - - // Some types cannot be resolved immediately because they depend on other - // types being resolved to each other first. This contains a list of types we - // are waiting to recheck. - std::vector<std::string> DelayedTypesToResolve; - - for ( ; TI != TE; ++TI ) { - const std::string &Name = TI->first; - const Type *RHS = TI->second; - - // Check to see if this type name is already in the dest module. - Type *Entry = DestST->lookup(Name); - - // If the name is just in the source module, bring it over to the dest. - if (Entry == 0) { - if (!Name.empty()) - DestST->insert(Name, const_cast<Type*>(RHS)); - } else if (ResolveTypes(Entry, RHS)) { - // They look different, save the types 'till later to resolve. - DelayedTypesToResolve.push_back(Name); +/// getImpl - This is the recursive version of get(). +Type *TypeMapTy::getImpl(Type *Ty) { + // If we already have an entry for this type, return it. + Type **Entry = &MappedTypes[Ty]; + if (*Entry) return *Entry; + + // If this is not a named struct type, then just map all of the elements and + // then rebuild the type from inside out. + if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) { + // If there are no element types to map, then the type is itself. This is + // true for the anonymous {} struct, things like 'float', integers, etc. + if (Ty->getNumContainedTypes() == 0) + return *Entry = Ty; + + // Remap all of the elements, keeping track of whether any of them change. + bool AnyChange = false; + SmallVector<Type*, 4> ElementTypes; + ElementTypes.resize(Ty->getNumContainedTypes()); + for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) { + ElementTypes[i] = getImpl(Ty->getContainedType(i)); + AnyChange |= ElementTypes[i] != Ty->getContainedType(i); + } + + // If we found our type while recursively processing stuff, just use it. + Entry = &MappedTypes[Ty]; + if (*Entry) return *Entry; + + // If all of the element types mapped directly over, then the type is usable + // as-is. + if (!AnyChange) + return *Entry = Ty; + + // Otherwise, rebuild a modified type. + switch (Ty->getTypeID()) { + default: assert(0 && "unknown derived type to remap"); + case Type::ArrayTyID: + return *Entry = ArrayType::get(ElementTypes[0], + cast<ArrayType>(Ty)->getNumElements()); + case Type::VectorTyID: + return *Entry = VectorType::get(ElementTypes[0], + cast<VectorType>(Ty)->getNumElements()); + case Type::PointerTyID: + return *Entry = PointerType::get(ElementTypes[0], + cast<PointerType>(Ty)->getAddressSpace()); + case Type::FunctionTyID: + return *Entry = FunctionType::get(ElementTypes[0], + ArrayRef<Type*>(ElementTypes).slice(1), + cast<FunctionType>(Ty)->isVarArg()); + case Type::StructTyID: + // Note that this is only reached for anonymous structs. + return *Entry = StructType::get(Ty->getContext(), ElementTypes, + cast<StructType>(Ty)->isPacked()); } } - // Iteratively resolve types while we can... - while (!DelayedTypesToResolve.empty()) { - // Loop over all of the types, attempting to resolve them if possible... - unsigned OldSize = DelayedTypesToResolve.size(); - - // Try direct resolution by name... - for (unsigned i = 0; i != DelayedTypesToResolve.size(); ++i) { - const std::string &Name = DelayedTypesToResolve[i]; - Type *T1 = SrcST->lookup(Name); - Type *T2 = DestST->lookup(Name); - if (!ResolveTypes(T2, T1)) { - // We are making progress! - DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i); - --i; - } - } + // Otherwise, this is an unmapped named struct. If the struct can be directly + // mapped over, just use it as-is. This happens in a case when the linked-in + // module has something like: + // %T = type {%T*, i32} + // @GV = global %T* null + // where T does not exist at all in the destination module. + // + // The other case we watch for is when the type is not in the destination + // module, but that it has to be rebuilt because it refers to something that + // is already mapped. For example, if the destination module has: + // %A = type { i32 } + // and the source module has something like + // %A' = type { i32 } + // %B = type { %A'* } + // @GV = global %B* null + // then we want to create a new type: "%B = type { %A*}" and have it take the + // pristine "%B" name from the source module. + // + // To determine which case this is, we have to recursively walk the type graph + // speculating that we'll be able to reuse it unmodified. Only if this is + // safe would we map the entire thing over. Because this is an optimization, + // and is not required for the prettiness of the linked module, we just skip + // it and always rebuild a type here. + StructType *STy = cast<StructType>(Ty); + + // If the type is opaque, we can just use it directly. + if (STy->isOpaque()) + return *Entry = STy; + + // Otherwise we create a new type and resolve its body later. This will be + // resolved by the top level of get(). + DefinitionsToResolve.push_back(STy); + return *Entry = StructType::createNamed(STy->getContext(), ""); +} - // Did we not eliminate any types? - if (DelayedTypesToResolve.size() == OldSize) { - // Attempt to resolve subelements of types. This allows us to merge these - // two types: { int* } and { opaque* } - for (unsigned i = 0, e = DelayedTypesToResolve.size(); i != e; ++i) { - const std::string &Name = DelayedTypesToResolve[i]; - if (!RecursiveResolveTypes(SrcST->lookup(Name), DestST->lookup(Name))) { - // We are making progress! - DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i); - - // Go back to the main loop, perhaps we can resolve directly by name - // now... - break; - } - } - // If we STILL cannot resolve the types, then there is something wrong. - if (DelayedTypesToResolve.size() == OldSize) { - // Remove the symbol name from the destination. - DelayedTypesToResolve.pop_back(); - } - } - } +//===----------------------------------------------------------------------===// +// ModuleLinker implementation. +//===----------------------------------------------------------------------===// - return false; +namespace { + /// ModuleLinker - This is an implementation class for the LinkModules + /// function, which is the entrypoint for this file. + class ModuleLinker { + Module *DstM, *SrcM; + + TypeMapTy TypeMap; + + /// ValueMap - Mapping of values from what they used to be in Src, to what + /// they are now in DstM. ValueToValueMapTy is a ValueMap, which involves + /// some overhead due to the use of Value handles which the Linker doesn't + /// actually need, but this allows us to reuse the ValueMapper code. + ValueToValueMapTy ValueMap; + + struct AppendingVarInfo { + GlobalVariable *NewGV; // New aggregate global in dest module. + Constant *DstInit; // Old initializer from dest module. + Constant *SrcInit; // Old initializer from src module. + }; + + std::vector<AppendingVarInfo> AppendingVars; + + public: + std::string ErrorMsg; + + ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { } + + bool run(); + + private: + /// emitError - Helper method for setting a message and returning an error + /// code. + bool emitError(const Twine &Message) { + ErrorMsg = Message.str(); + return true; + } + + /// getLinkageResult - This analyzes the two global values and determines + /// what the result will look like in the destination module. + bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, + GlobalValue::LinkageTypes <, bool &LinkFromSrc); + + /// getLinkedToGlobal - Given a global in the source module, return the + /// global in the destination module that is being linked to, if any. + GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) { + // If the source has no name it can't link. If it has local linkage, + // there is no name match-up going on. + if (!SrcGV->hasName() || SrcGV->hasLocalLinkage()) + return 0; + + // Otherwise see if we have a match in the destination module's symtab. + GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName()); + if (DGV == 0) return 0; + + // If we found a global with the same name in the dest module, but it has + // internal linkage, we are really not doing any linkage here. + if (DGV->hasLocalLinkage()) + return 0; + + // Otherwise, we do in fact link to the destination global. + return DGV; + } + + void computeTypeMapping(); + + bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV); + bool linkGlobalProto(GlobalVariable *SrcGV); + bool linkFunctionProto(Function *SrcF); + bool linkAliasProto(GlobalAlias *SrcA); + + void linkAppendingVarInit(const AppendingVarInfo &AVI); + void linkGlobalInits(); + void linkFunctionBody(Function *Dst, Function *Src); + void linkAliasBodies(); + void linkNamedMDNodes(); + }; } -/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict + + +/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict /// in the symbol table. This is good for all clients except for us. Go /// through the trouble to force this back. -static void ForceRenaming(GlobalValue *GV, const std::string &Name) { - assert(GV->getName() != Name && "Can't force rename to self"); - ValueSymbolTable &ST = GV->getParent()->getValueSymbolTable(); +static void forceRenaming(GlobalValue *GV, StringRef Name) { + // If the global doesn't force its name or if it already has the right name, + // there is nothing for us to do. + if (GV->hasLocalLinkage() || GV->getName() == Name) + return; + + Module *M = GV->getParent(); // If there is a conflict, rename the conflict. - if (GlobalValue *ConflictGV = cast_or_null<GlobalValue>(ST.lookup(Name))) { - assert(ConflictGV->hasLocalLinkage() && - "Not conflicting with a static global, should link instead!"); + if (GlobalValue *ConflictGV = M->getNamedValue(Name)) { GV->takeName(ConflictGV); ConflictGV->setName(Name); // This will cause ConflictGV to get renamed - assert(ConflictGV->getName() != Name && "ForceRenaming didn't work"); + assert(ConflictGV->getName() != Name && "forceRenaming didn't work"); } else { GV->setName(Name); // Force the name back } @@ -352,30 +419,33 @@ static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) { unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment()); DestGV->copyAttributesFrom(SrcGV); DestGV->setAlignment(Alignment); + + forceRenaming(DestGV, SrcGV->getName()); } -/// GetLinkageResult - This analyzes the two global values and determines what +/// getLinkageResult - This analyzes the two global values and determines what /// the result will look like in the destination module. In particular, it /// computes the resultant linkage type, computes whether the global in the /// source should be copied over to the destination (replacing the existing /// one), and computes whether this linkage is an error or not. It also performs /// visibility checks: we cannot link together two symbols with different /// visibilities. -static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src, - GlobalValue::LinkageTypes <, bool &LinkFromSrc, - std::string *Err) { - assert((!Dest || !Src->hasLocalLinkage()) && +bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, + GlobalValue::LinkageTypes <, + bool &LinkFromSrc) { + assert(Dest && "Must have two globals being queried"); + assert(!Src->hasLocalLinkage() && "If Src has internal linkage, Dest shouldn't be set!"); - if (!Dest) { - // Linking something to nothing. - LinkFromSrc = true; - LT = Src->getLinkage(); - } else if (Src->isDeclaration()) { + + bool SrcIsDeclaration = Src->isDeclaration(); + bool DestIsDeclaration = Dest->isDeclaration(); + + if (SrcIsDeclaration) { // If Src is external or if both Src & Dest are external.. Just link the // external globals, we aren't adding anything. if (Src->hasDLLImportLinkage()) { // If one of GVs has DLLImport linkage, result should be dllimport'ed. - if (Dest->isDeclaration()) { + if (DestIsDeclaration) { LinkFromSrc = true; LT = Src->getLinkage(); } @@ -387,16 +457,10 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src, LinkFromSrc = false; LT = Dest->getLinkage(); } - } else if (Dest->isDeclaration() && !Dest->hasDLLImportLinkage()) { + } else if (DestIsDeclaration && !Dest->hasDLLImportLinkage()) { // If Dest is external but Src is not: LinkFromSrc = true; LT = Src->getLinkage(); - } else if (Src->hasAppendingLinkage() || Dest->hasAppendingLinkage()) { - if (Src->getLinkage() != Dest->getLinkage()) - return Error(Err, "Linking globals named '" + Src->getName() + - "': can only link appending global with another appending global!"); - LinkFromSrc = true; // Special cased. - LT = Src->getLinkage(); } else if (Src->isWeakForLinker()) { // At this point we know that Dest has LinkOnce, External*, Weak, Common, // or DLL* linkage. @@ -420,883 +484,485 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src, LT = GlobalValue::ExternalLinkage; } } else { - assert((Dest->hasExternalLinkage() || - Dest->hasDLLImportLinkage() || - Dest->hasDLLExportLinkage() || - Dest->hasExternalWeakLinkage()) && - (Src->hasExternalLinkage() || - Src->hasDLLImportLinkage() || - Src->hasDLLExportLinkage() || - Src->hasExternalWeakLinkage()) && + assert((Dest->hasExternalLinkage() || Dest->hasDLLImportLinkage() || + Dest->hasDLLExportLinkage() || Dest->hasExternalWeakLinkage()) && + (Src->hasExternalLinkage() || Src->hasDLLImportLinkage() || + Src->hasDLLExportLinkage() || Src->hasExternalWeakLinkage()) && "Unexpected linkage type!"); - return Error(Err, "Linking globals named '" + Src->getName() + + return emitError("Linking globals named '" + Src->getName() + "': symbol multiply defined!"); } // Check visibility - if (Dest && Src->getVisibility() != Dest->getVisibility() && - !Src->isDeclaration() && !Dest->isDeclaration() && + if (Src->getVisibility() != Dest->getVisibility() && + !SrcIsDeclaration && !DestIsDeclaration && !Src->hasAvailableExternallyLinkage() && !Dest->hasAvailableExternallyLinkage()) - return Error(Err, "Linking globals named '" + Src->getName() + + return emitError("Linking globals named '" + Src->getName() + "': symbols have different visibilities!"); return false; } -// Insert all of the named mdnoes in Src into the Dest module. -static void LinkNamedMDNodes(Module *Dest, Module *Src, - ValueToValueMapTy &ValueMap) { - for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(), - E = Src->named_metadata_end(); I != E; ++I) { - const NamedMDNode *SrcNMD = I; - NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName()); - // Add Src elements into Dest node. - for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i) - DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i), - ValueMap))); +/// computeTypeMapping - Loop over all of the linked values to compute type +/// mappings. For example, if we link "extern Foo *x" and "Foo *x = NULL", then +/// we have two struct types 'Foo' but one got renamed when the module was +/// loaded into the same LLVMContext. +void ModuleLinker::computeTypeMapping() { + // Incorporate globals. + for (Module::global_iterator I = SrcM->global_begin(), + E = SrcM->global_end(); I != E; ++I) { + GlobalValue *DGV = getLinkedToGlobal(I); + if (DGV == 0) continue; + + if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) { + TypeMap.addTypeMapping(DGV->getType(), I->getType()); + continue; + } + + // Unify the element type of appending arrays. + ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType()); + ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType()); + TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType()); + } + + // Incorporate functions. + for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) { + if (GlobalValue *DGV = getLinkedToGlobal(I)) + TypeMap.addTypeMapping(DGV->getType(), I->getType()); } + + // Don't bother incorporating aliases, they aren't generally typed well. + + // Now that we have discovered all of the type equivalences, get a body for + // any 'opaque' types in the dest module that are now resolved. + TypeMap.linkDefinedTypeBodies(); } -// LinkGlobals - Loop through the global variables in the src module and merge -// them into the dest module. -static bool LinkGlobals(Module *Dest, const Module *Src, - ValueToValueMapTy &ValueMap, - std::multimap<std::string, GlobalVariable *> &AppendingVars, - std::string *Err) { - ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable(); - - // Loop over all of the globals in the src module, mapping them over as we go - for (Module::const_global_iterator I = Src->global_begin(), - E = Src->global_end(); I != E; ++I) { - const GlobalVariable *SGV = I; - GlobalValue *DGV = 0; - - // Check to see if may have to link the global with the global, alias or - // function. - if (SGV->hasName() && !SGV->hasLocalLinkage()) - DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getName())); - - // If we found a global with the same name in the dest module, but it has - // internal linkage, we are really not doing any linkage here. - if (DGV && DGV->hasLocalLinkage()) - DGV = 0; - - // If types don't agree due to opaque types, try to resolve them. - if (DGV && DGV->getType() != SGV->getType()) - RecursiveResolveTypes(SGV->getType(), DGV->getType()); - - assert((SGV->hasInitializer() || SGV->hasExternalWeakLinkage() || - SGV->hasExternalLinkage() || SGV->hasDLLImportLinkage()) && - "Global must either be external or have an initializer!"); +/// linkAppendingVarProto - If there were any appending global variables, link +/// them together now. Return true on error. +bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, + GlobalVariable *SrcGV) { + + if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) + return emitError("Linking globals named '" + SrcGV->getName() + + "': can only link appending global with another appending global!"); + + ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType()); + ArrayType *SrcTy = + cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType())); + Type *EltTy = DstTy->getElementType(); + + // Check to see that they two arrays agree on type. + if (EltTy != SrcTy->getElementType()) + return emitError("Appending variables with different element types!"); + if (DstGV->isConstant() != SrcGV->isConstant()) + return emitError("Appending variables linked with different const'ness!"); + + if (DstGV->getAlignment() != SrcGV->getAlignment()) + return emitError( + "Appending variables with different alignment need to be linked!"); + + if (DstGV->getVisibility() != SrcGV->getVisibility()) + return emitError( + "Appending variables with different visibility need to be linked!"); + + if (DstGV->getSection() != SrcGV->getSection()) + return emitError( + "Appending variables with different section name need to be linked!"); + + uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements(); + ArrayType *NewType = ArrayType::get(EltTy, NewSize); + + // Create the new global variable. + GlobalVariable *NG = + new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(), + DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV, + DstGV->isThreadLocal(), + DstGV->getType()->getAddressSpace()); + + // Propagate alignment, visibility and section info. + CopyGVAttributes(NG, DstGV); + + AppendingVarInfo AVI; + AVI.NewGV = NG; + AVI.DstInit = DstGV->getInitializer(); + AVI.SrcInit = SrcGV->getInitializer(); + AppendingVars.push_back(AVI); + + // Replace any uses of the two global variables with uses of the new + // global. + ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); + + DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); + DstGV->eraseFromParent(); + + // Zap the initializer in the source variable so we don't try to link it. + SrcGV->setInitializer(0); + SrcGV->setLinkage(GlobalValue::ExternalLinkage); + return false; +} +/// linkGlobalProto - Loop through the global variables in the src module and +/// merge them into the dest module. +bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { + GlobalValue *DGV = getLinkedToGlobal(SGV); + + if (DGV) { + // Concatenation of appending linkage variables is magic and handled later. + if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage()) + return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV); + + // Determine whether linkage of these two globals follows the source + // module's definition or the destination module's definition. GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; bool LinkFromSrc = false; - if (GetLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc, Err)) + if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc)) return true; - if (DGV == 0) { - // No linking to be performed, simply create an identical version of the - // symbol over in the dest module... the initializer will be filled in - // later by LinkGlobalInits. - GlobalVariable *NewDGV = - new GlobalVariable(*Dest, SGV->getType()->getElementType(), - SGV->isConstant(), SGV->getLinkage(), /*init*/0, - SGV->getName(), 0, false, - SGV->getType()->getAddressSpace()); - // Propagate alignment, visibility and section info. - CopyGVAttributes(NewDGV, SGV); - NewDGV->setUnnamedAddr(SGV->hasUnnamedAddr()); - - // If the LLVM runtime renamed the global, but it is an externally visible - // symbol, DGV must be an existing global with internal linkage. Rename - // it. - if (!NewDGV->hasLocalLinkage() && NewDGV->getName() != SGV->getName()) - ForceRenaming(NewDGV, SGV->getName()); - - // Make sure to remember this mapping. - ValueMap[SGV] = NewDGV; - - // Keep track that this is an appending variable. - if (SGV->hasAppendingLinkage()) - AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV)); - continue; - } - - bool HasUnnamedAddr = SGV->hasUnnamedAddr() && DGV->hasUnnamedAddr(); - - // If the visibilities of the symbols disagree and the destination is a - // prototype, take the visibility of its input. - if (DGV->isDeclaration()) - DGV->setVisibility(SGV->getVisibility()); - - if (DGV->hasAppendingLinkage()) { - // No linking is performed yet. Just insert a new copy of the global, and - // keep track of the fact that it is an appending variable in the - // AppendingVars map. The name is cleared out so that no linkage is - // performed. - GlobalVariable *NewDGV = - new GlobalVariable(*Dest, SGV->getType()->getElementType(), - SGV->isConstant(), SGV->getLinkage(), /*init*/0, - "", 0, false, - SGV->getType()->getAddressSpace()); - - // Set alignment allowing CopyGVAttributes merge it with alignment of SGV. - NewDGV->setAlignment(DGV->getAlignment()); - // Propagate alignment, section and visibility info. - CopyGVAttributes(NewDGV, SGV); - - // Make sure to remember this mapping... - ValueMap[SGV] = NewDGV; - - // Keep track that this is an appending variable... - AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV)); - continue; - } - - if (LinkFromSrc) { - if (isa<GlobalAlias>(DGV)) - return Error(Err, "Global-Alias Collision on '" + SGV->getName() + - "': symbol multiple defined"); - - // If the types don't match, and if we are to link from the source, nuke - // DGV and create a new one of the appropriate type. Note that the thing - // we are replacing may be a function (if a prototype, weak, etc) or a - // global variable. - GlobalVariable *NewDGV = - new GlobalVariable(*Dest, SGV->getType()->getElementType(), - SGV->isConstant(), NewLinkage, /*init*/0, - DGV->getName(), 0, false, - SGV->getType()->getAddressSpace()); - - // Set the unnamed_addr. - NewDGV->setUnnamedAddr(HasUnnamedAddr); - - // Propagate alignment, section, and visibility info. - CopyGVAttributes(NewDGV, SGV); - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, - DGV->getType())); - - // DGV will conflict with NewDGV because they both had the same - // name. We must erase this now so ForceRenaming doesn't assert - // because DGV might not have internal linkage. - if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV)) - Var->eraseFromParent(); - else - cast<Function>(DGV)->eraseFromParent(); - - // If the symbol table renamed the global, but it is an externally visible - // symbol, DGV must be an existing global with internal linkage. Rename. - if (NewDGV->getName() != SGV->getName() && !NewDGV->hasLocalLinkage()) - ForceRenaming(NewDGV, SGV->getName()); - - // Inherit const as appropriate. - NewDGV->setConstant(SGV->isConstant()); - + // If we're not linking from the source, then keep the definition that we + // have. + if (!LinkFromSrc) { + // Special case for const propagation. + if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) + if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant()) + DGVar->setConstant(true); + + // Set calculated linkage. + DGV->setLinkage(NewLinkage); + // Make sure to remember this mapping. - ValueMap[SGV] = NewDGV; - continue; + ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType())); + + // Destroy the source global's initializer (and convert it to a prototype) + // so that we don't attempt to copy it over when processing global + // initializers. + SGV->setInitializer(0); + SGV->setLinkage(GlobalValue::ExternalLinkage); + return false; } - - // Not "link from source", keep the one in the DestModule and remap the - // input onto it. - - // Special case for const propagation. - if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) - if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant()) - DGVar->setConstant(true); - - // SGV is global, but DGV is alias. - if (isa<GlobalAlias>(DGV)) { - // The only valid mappings are: - // - SGV is external declaration, which is effectively a no-op. - // - SGV is weak, when we just need to throw SGV out. - if (!SGV->isDeclaration() && !SGV->isWeakForLinker()) - return Error(Err, "Global-Alias Collision on '" + SGV->getName() + - "': symbol multiple defined"); - } - - // Set calculated linkage and unnamed_addr - DGV->setLinkage(NewLinkage); - DGV->setUnnamedAddr(HasUnnamedAddr); - - // Make sure to remember this mapping... - ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType()); } - return false; -} - -static GlobalValue::LinkageTypes -CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) { - GlobalValue::LinkageTypes SL = SGV->getLinkage(); - GlobalValue::LinkageTypes DL = DGV->getLinkage(); - if (SL == GlobalValue::ExternalLinkage || DL == GlobalValue::ExternalLinkage) - return GlobalValue::ExternalLinkage; - else if (SL == GlobalValue::WeakAnyLinkage || - DL == GlobalValue::WeakAnyLinkage) - return GlobalValue::WeakAnyLinkage; - else if (SL == GlobalValue::WeakODRLinkage || - DL == GlobalValue::WeakODRLinkage) - return GlobalValue::WeakODRLinkage; - else if (SL == GlobalValue::InternalLinkage && - DL == GlobalValue::InternalLinkage) - return GlobalValue::InternalLinkage; - else if (SL == GlobalValue::LinkerPrivateLinkage && - DL == GlobalValue::LinkerPrivateLinkage) - return GlobalValue::LinkerPrivateLinkage; - else if (SL == GlobalValue::LinkerPrivateWeakLinkage && - DL == GlobalValue::LinkerPrivateWeakLinkage) - return GlobalValue::LinkerPrivateWeakLinkage; - else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage && - DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage) - return GlobalValue::LinkerPrivateWeakDefAutoLinkage; - else { - assert (SL == GlobalValue::PrivateLinkage && - DL == GlobalValue::PrivateLinkage && "Unexpected linkage type"); - return GlobalValue::PrivateLinkage; + + // No linking to be performed or linking from the source: simply create an + // identical version of the symbol over in the dest module... the + // initializer will be filled in later by LinkGlobalInits. + GlobalVariable *NewDGV = + new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()), + SGV->isConstant(), SGV->getLinkage(), /*init*/0, + SGV->getName(), /*insertbefore*/0, + SGV->isThreadLocal(), + SGV->getType()->getAddressSpace()); + // Propagate alignment, visibility and section info. + CopyGVAttributes(NewDGV, SGV); + + if (DGV) { + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType())); + DGV->eraseFromParent(); } -} - -// LinkAlias - Loop through the alias in the src module and link them into the -// dest module. We're assuming, that all functions/global variables were already -// linked in. -static bool LinkAlias(Module *Dest, const Module *Src, - ValueToValueMapTy &ValueMap, - std::string *Err) { - // Loop over all alias in the src module - for (Module::const_alias_iterator I = Src->alias_begin(), - E = Src->alias_end(); I != E; ++I) { - const GlobalAlias *SGA = I; - const GlobalValue *SAliasee = SGA->getAliasedGlobal(); - GlobalAlias *NewGA = NULL; - - // Globals were already linked, thus we can just query ValueMap for variant - // of SAliasee in Dest. - ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee); - assert(VMI != ValueMap.end() && "Aliasee not linked"); - GlobalValue* DAliasee = cast<GlobalValue>(VMI->second); - GlobalValue* DGV = NULL; - - // Fixup aliases to bitcasts. Note that aliases to GEPs are still broken - // by this, but aliases to GEPs are broken to a lot of other things, so - // it's less important. - Constant *DAliaseeConst = DAliasee; - if (SGA->getType() != DAliasee->getType()) - DAliaseeConst = ConstantExpr::getBitCast(DAliasee, SGA->getType()); - - // Try to find something 'similar' to SGA in destination module. - if (!DGV && !SGA->hasLocalLinkage()) { - DGV = Dest->getNamedAlias(SGA->getName()); - - // If types don't agree due to opaque types, try to resolve them. - if (DGV && DGV->getType() != SGA->getType()) - RecursiveResolveTypes(SGA->getType(), DGV->getType()); - } - - if (!DGV && !SGA->hasLocalLinkage()) { - DGV = Dest->getGlobalVariable(SGA->getName()); - - // If types don't agree due to opaque types, try to resolve them. - if (DGV && DGV->getType() != SGA->getType()) - RecursiveResolveTypes(SGA->getType(), DGV->getType()); - } - - if (!DGV && !SGA->hasLocalLinkage()) { - DGV = Dest->getFunction(SGA->getName()); - - // If types don't agree due to opaque types, try to resolve them. - if (DGV && DGV->getType() != SGA->getType()) - RecursiveResolveTypes(SGA->getType(), DGV->getType()); - } - - // No linking to be performed on internal stuff. - if (DGV && DGV->hasLocalLinkage()) - DGV = NULL; - - if (GlobalAlias *DGA = dyn_cast_or_null<GlobalAlias>(DGV)) { - // Types are known to be the same, check whether aliasees equal. As - // globals are already linked we just need query ValueMap to find the - // mapping. - if (DAliasee == DGA->getAliasedGlobal()) { - // This is just two copies of the same alias. Propagate linkage, if - // necessary. - DGA->setLinkage(CalculateAliasLinkage(SGA, DGA)); - - NewGA = DGA; - // Proceed to 'common' steps - } else - return Error(Err, "Alias Collision on '" + SGA->getName()+ - "': aliases have different aliasees"); - } else if (GlobalVariable *DGVar = dyn_cast_or_null<GlobalVariable>(DGV)) { - // The only allowed way is to link alias with external declaration or weak - // symbol.. - if (DGVar->isDeclaration() || DGVar->isWeakForLinker()) { - // But only if aliasee is global too... - if (!isa<GlobalVariable>(DAliasee)) - return Error(Err, "Global-Alias Collision on '" + SGA->getName() + - "': aliasee is not global variable"); - - NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), - SGA->getName(), DAliaseeConst, Dest); - CopyGVAttributes(NewGA, SGA); - - // Any uses of DGV need to change to NewGA, with cast, if needed. - if (SGA->getType() != DGVar->getType()) - DGVar->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA, - DGVar->getType())); - else - DGVar->replaceAllUsesWith(NewGA); - - // DGVar will conflict with NewGA because they both had the same - // name. We must erase this now so ForceRenaming doesn't assert - // because DGV might not have internal linkage. - DGVar->eraseFromParent(); - - // Proceed to 'common' steps - } else - return Error(Err, "Global-Alias Collision on '" + SGA->getName() + - "': symbol multiple defined"); - } else if (Function *DF = dyn_cast_or_null<Function>(DGV)) { - // The only allowed way is to link alias with external declaration or weak - // symbol... - if (DF->isDeclaration() || DF->isWeakForLinker()) { - // But only if aliasee is function too... - if (!isa<Function>(DAliasee)) - return Error(Err, "Function-Alias Collision on '" + SGA->getName() + - "': aliasee is not function"); - - NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), - SGA->getName(), DAliaseeConst, Dest); - CopyGVAttributes(NewGA, SGA); - - // Any uses of DF need to change to NewGA, with cast, if needed. - if (SGA->getType() != DF->getType()) - DF->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA, - DF->getType())); - else - DF->replaceAllUsesWith(NewGA); - - // DF will conflict with NewGA because they both had the same - // name. We must erase this now so ForceRenaming doesn't assert - // because DF might not have internal linkage. - DF->eraseFromParent(); - - // Proceed to 'common' steps - } else - return Error(Err, "Function-Alias Collision on '" + SGA->getName() + - "': symbol multiple defined"); - } else { - // No linking to be performed, simply create an identical version of the - // alias over in the dest module... - NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(), - SGA->getName(), DAliaseeConst, Dest); - CopyGVAttributes(NewGA, SGA); - - // Proceed to 'common' steps - } - - assert(NewGA && "No alias was created in destination module!"); - - // If the symbol table renamed the alias, but it is an externally visible - // symbol, DGA must be an global value with internal linkage. Rename it. - if (NewGA->getName() != SGA->getName() && - !NewGA->hasLocalLinkage()) - ForceRenaming(NewGA, SGA->getName()); - - // Remember this mapping so uses in the source module get remapped - // later by MapValue. - ValueMap[SGA] = NewGA; - } - + + // Make sure to remember this mapping. + ValueMap[SGV] = NewDGV; return false; } +/// linkFunctionProto - Link the function in the source module into the +/// destination module if needed, setting up mapping information. +bool ModuleLinker::linkFunctionProto(Function *SF) { + GlobalValue *DGV = getLinkedToGlobal(SF); -// LinkGlobalInits - Update the initializers in the Dest module now that all -// globals that may be referenced are in Dest. -static bool LinkGlobalInits(Module *Dest, const Module *Src, - ValueToValueMapTy &ValueMap, - std::string *Err) { - // Loop over all of the globals in the src module, mapping them over as we go - for (Module::const_global_iterator I = Src->global_begin(), - E = Src->global_end(); I != E; ++I) { - const GlobalVariable *SGV = I; - - if (SGV->hasInitializer()) { // Only process initialized GV's - // Figure out what the initializer looks like in the dest module. - Constant *SInit = - cast<Constant>(MapValue(SGV->getInitializer(), ValueMap)); - // Grab destination global variable or alias. - GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts()); - - // If dest if global variable, check that initializers match. - if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) { - if (DGVar->hasInitializer()) { - if (SGV->hasExternalLinkage()) { - if (DGVar->getInitializer() != SInit) - return Error(Err, "Global Variable Collision on '" + - SGV->getName() + - "': global variables have different initializers"); - } else if (DGVar->isWeakForLinker()) { - // Nothing is required, mapped values will take the new global - // automatically. - } else if (SGV->isWeakForLinker()) { - // Nothing is required, mapped values will take the new global - // automatically. - } else if (DGVar->hasAppendingLinkage()) { - llvm_unreachable("Appending linkage unimplemented!"); - } else { - llvm_unreachable("Unknown linkage!"); - } - } else { - // Copy the initializer over now... - DGVar->setInitializer(SInit); - } - } else { - // Destination is alias, the only valid situation is when source is - // weak. Also, note, that we already checked linkage in LinkGlobals(), - // thus we assert here. - // FIXME: Should we weaken this assumption, 'dereference' alias and - // check for initializer of aliasee? - assert(SGV->isWeakForLinker()); - } + if (DGV) { + GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; + bool LinkFromSrc = false; + if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc)) + return true; + + if (!LinkFromSrc) { + // Set calculated linkage + DGV->setLinkage(NewLinkage); + + // Make sure to remember this mapping. + ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType())); + + // Remove the body from the source module so we don't attempt to remap it. + SF->deleteBody(); + return false; } } + + // If there is no linkage to be performed or we are linking from the source, + // bring SF over. + Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()), + SF->getLinkage(), SF->getName(), DstM); + CopyGVAttributes(NewDF, SF); + + if (DGV) { + // Any uses of DF need to change to NewDF, with cast. + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); + DGV->eraseFromParent(); + } + + ValueMap[SF] = NewDF; return false; } -// LinkFunctionProtos - Link the functions together between the two modules, -// without doing function bodies... this just adds external function prototypes -// to the Dest function... -// -static bool LinkFunctionProtos(Module *Dest, const Module *Src, - ValueToValueMapTy &ValueMap, - std::string *Err) { - ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable(); - - // Loop over all of the functions in the src module, mapping them over - for (Module::const_iterator I = Src->begin(), E = Src->end(); I != E; ++I) { - const Function *SF = I; // SrcFunction - GlobalValue *DGV = 0; - - // Check to see if may have to link the function with the global, alias or - // function. - if (SF->hasName() && !SF->hasLocalLinkage()) - DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getName())); - - // If we found a global with the same name in the dest module, but it has - // internal linkage, we are really not doing any linkage here. - if (DGV && DGV->hasLocalLinkage()) - DGV = 0; - - // If types don't agree due to opaque types, try to resolve them. - if (DGV && DGV->getType() != SF->getType()) - RecursiveResolveTypes(SF->getType(), DGV->getType()); - +/// LinkAliasProto - Set up prototypes for any aliases that come over from the +/// source module. +bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { + GlobalValue *DGV = getLinkedToGlobal(SGA); + + if (DGV) { GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; bool LinkFromSrc = false; - if (GetLinkageResult(DGV, SF, NewLinkage, LinkFromSrc, Err)) + if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc)) return true; - - // If there is no linkage to be performed, just bring over SF without - // modifying it. - if (DGV == 0) { - // Function does not already exist, simply insert an function signature - // identical to SF into the dest module. - Function *NewDF = Function::Create(SF->getFunctionType(), - SF->getLinkage(), - SF->getName(), Dest); - CopyGVAttributes(NewDF, SF); - - // If the LLVM runtime renamed the function, but it is an externally - // visible symbol, DF must be an existing function with internal linkage. - // Rename it. - if (!NewDF->hasLocalLinkage() && NewDF->getName() != SF->getName()) - ForceRenaming(NewDF, SF->getName()); - - // ... and remember this mapping... - ValueMap[SF] = NewDF; - continue; - } - - // If the visibilities of the symbols disagree and the destination is a - // prototype, take the visibility of its input. - if (DGV->isDeclaration()) - DGV->setVisibility(SF->getVisibility()); - - if (LinkFromSrc) { - if (isa<GlobalAlias>(DGV)) - return Error(Err, "Function-Alias Collision on '" + SF->getName() + - "': symbol multiple defined"); - - // We have a definition of the same name but different type in the - // source module. Copy the prototype to the destination and replace - // uses of the destination's prototype with the new prototype. - Function *NewDF = Function::Create(SF->getFunctionType(), NewLinkage, - SF->getName(), Dest); - CopyGVAttributes(NewDF, SF); - - // Any uses of DF need to change to NewDF, with cast - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, - DGV->getType())); - - // DF will conflict with NewDF because they both had the same. We must - // erase this now so ForceRenaming doesn't assert because DF might - // not have internal linkage. - if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV)) - Var->eraseFromParent(); - else - cast<Function>(DGV)->eraseFromParent(); - - // If the symbol table renamed the function, but it is an externally - // visible symbol, DF must be an existing function with internal - // linkage. Rename it. - if (NewDF->getName() != SF->getName() && !NewDF->hasLocalLinkage()) - ForceRenaming(NewDF, SF->getName()); - - // Remember this mapping so uses in the source module get remapped - // later by MapValue. - ValueMap[SF] = NewDF; - continue; + + if (!LinkFromSrc) { + // Set calculated linkage. + DGV->setLinkage(NewLinkage); + + // Make sure to remember this mapping. + ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType())); + + // Remove the body from the source module so we don't attempt to remap it. + SGA->setAliasee(0); + return false; } + } + + // If there is no linkage to be performed or we're linking from the source, + // bring over SGA. + GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()), + SGA->getLinkage(), SGA->getName(), + /*aliasee*/0, DstM); + CopyGVAttributes(NewDA, SGA); + + if (DGV) { + // Any uses of DGV need to change to NewDA, with cast. + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType())); + DGV->eraseFromParent(); + } + + ValueMap[SGA] = NewDA; + return false; +} - // Not "link from source", keep the one in the DestModule and remap the - // input onto it. - - if (isa<GlobalAlias>(DGV)) { - // The only valid mappings are: - // - SF is external declaration, which is effectively a no-op. - // - SF is weak, when we just need to throw SF out. - if (!SF->isDeclaration() && !SF->isWeakForLinker()) - return Error(Err, "Function-Alias Collision on '" + SF->getName() + - "': symbol multiple defined"); - } +void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { + // Merge the initializer. + SmallVector<Constant*, 16> Elements; + if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + Elements.push_back(I->getOperand(i)); + } else { + assert(isa<ConstantAggregateZero>(AVI.DstInit)); + ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType()); + Type *EltTy = DstAT->getElementType(); + Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy)); + } + + Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap); + if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + Elements.push_back(I->getOperand(i)); + } else { + assert(isa<ConstantAggregateZero>(SrcInit)); + ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType()); + Type *EltTy = SrcAT->getElementType(); + Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy)); + } + ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType()); + AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements)); +} - // Set calculated linkage - DGV->setLinkage(NewLinkage); - // Make sure to remember this mapping. - ValueMap[SF] = ConstantExpr::getBitCast(DGV, SF->getType()); +// linkGlobalInits - Update the initializers in the Dest module now that all +// globals that may be referenced are in Dest. +void ModuleLinker::linkGlobalInits() { + // Loop over all of the globals in the src module, mapping them over as we go + for (Module::const_global_iterator I = SrcM->global_begin(), + E = SrcM->global_end(); I != E; ++I) { + if (!I->hasInitializer()) continue; // Only process initialized GV's. + + // Grab destination global variable. + GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]); + // Figure out what the initializer looks like in the dest module. + DGV->setInitializer(MapValue(I->getInitializer(), ValueMap, + RF_None, &TypeMap)); } - return false; } -// LinkFunctionBody - Copy the source function over into the dest function and +// linkFunctionBody - Copy the source function over into the dest function and // fix up references to values. At this point we know that Dest is an external // function, and that Src is not. -static bool LinkFunctionBody(Function *Dest, Function *Src, - ValueToValueMapTy &ValueMap, - std::string *Err) { - assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration()); +void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { + assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration()); // Go through and convert function arguments over, remembering the mapping. - Function::arg_iterator DI = Dest->arg_begin(); + Function::arg_iterator DI = Dst->arg_begin(); for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); I != E; ++I, ++DI) { - DI->setName(I->getName()); // Copy the name information over... + DI->setName(I->getName()); // Copy the name over. - // Add a mapping to our local map + // Add a mapping to our mapping. ValueMap[I] = DI; } // Splice the body of the source function into the dest function. - Dest->getBasicBlockList().splice(Dest->end(), Src->getBasicBlockList()); + Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList()); // At this point, all of the instructions and values of the function are now // copied over. The only problem is that they are still referencing values in // the Source function as operands. Loop through all of the operands of the // functions and patch them up to point to the local versions. - for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB) + for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries); + RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap); // There is no need to map the arguments anymore. for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); I != E; ++I) ValueMap.erase(I); - - return false; } -// LinkFunctionBodies - Link in the function bodies that are defined in the -// source module into the DestModule. This consists basically of copying the -// function over and fixing up references to values. -static bool LinkFunctionBodies(Module *Dest, Module *Src, - ValueToValueMapTy &ValueMap, - std::string *Err) { - - // Loop over all of the functions in the src module, mapping them over as we - // go - for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) { - if (!SF->isDeclaration()) { // No body if function is external - Function *DF = dyn_cast<Function>(ValueMap[SF]); // Destination function - - // DF not external SF external? - if (DF && DF->isDeclaration()) - // Only provide the function body if there isn't one already. - if (LinkFunctionBody(DF, SF, ValueMap, Err)) - return true; +void ModuleLinker::linkAliasBodies() { + for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end(); + I != E; ++I) + if (Constant *Aliasee = I->getAliasee()) { + GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]); + DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap)); } - } - return false; } -// LinkAppendingVars - If there were any appending global variables, link them -// together now. Return true on error. -static bool LinkAppendingVars(Module *M, - std::multimap<std::string, GlobalVariable *> &AppendingVars, - std::string *ErrorMsg) { - if (AppendingVars.empty()) return false; // Nothing to do. - - // Loop over the multimap of appending vars, processing any variables with the - // same name, forming a new appending global variable with both of the - // initializers merged together, then rewrite references to the old variables - // and delete them. - std::vector<Constant*> Inits; - while (AppendingVars.size() > 1) { - // Get the first two elements in the map... - std::multimap<std::string, - GlobalVariable*>::iterator Second = AppendingVars.begin(), First=Second++; - - // If the first two elements are for different names, there is no pair... - // Otherwise there is a pair, so link them together... - if (First->first == Second->first) { - GlobalVariable *G1 = First->second, *G2 = Second->second; - const ArrayType *T1 = cast<ArrayType>(G1->getType()->getElementType()); - const ArrayType *T2 = cast<ArrayType>(G2->getType()->getElementType()); - - // Check to see that they two arrays agree on type... - if (T1->getElementType() != T2->getElementType()) - return Error(ErrorMsg, - "Appending variables with different element types need to be linked!"); - if (G1->isConstant() != G2->isConstant()) - return Error(ErrorMsg, - "Appending variables linked with different const'ness!"); - - if (G1->getAlignment() != G2->getAlignment()) - return Error(ErrorMsg, - "Appending variables with different alignment need to be linked!"); - - if (G1->getVisibility() != G2->getVisibility()) - return Error(ErrorMsg, - "Appending variables with different visibility need to be linked!"); - - if (G1->getSection() != G2->getSection()) - return Error(ErrorMsg, - "Appending variables with different section name need to be linked!"); - - unsigned NewSize = T1->getNumElements() + T2->getNumElements(); - ArrayType *NewType = ArrayType::get(T1->getElementType(), - NewSize); - - G1->setName(""); // Clear G1's name in case of a conflict! - - // Create the new global variable... - GlobalVariable *NG = - new GlobalVariable(*M, NewType, G1->isConstant(), G1->getLinkage(), - /*init*/0, First->first, 0, G1->isThreadLocal(), - G1->getType()->getAddressSpace()); - - // Propagate alignment, visibility and section info. - CopyGVAttributes(NG, G1); - - // Merge the initializer... - Inits.reserve(NewSize); - if (ConstantArray *I = dyn_cast<ConstantArray>(G1->getInitializer())) { - for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i) - Inits.push_back(I->getOperand(i)); - } else { - assert(isa<ConstantAggregateZero>(G1->getInitializer())); - Constant *CV = Constant::getNullValue(T1->getElementType()); - for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i) - Inits.push_back(CV); - } - if (ConstantArray *I = dyn_cast<ConstantArray>(G2->getInitializer())) { - for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i) - Inits.push_back(I->getOperand(i)); - } else { - assert(isa<ConstantAggregateZero>(G2->getInitializer())); - Constant *CV = Constant::getNullValue(T2->getElementType()); - for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i) - Inits.push_back(CV); - } - NG->setInitializer(ConstantArray::get(NewType, Inits)); - Inits.clear(); - - // Replace any uses of the two global variables with uses of the new - // global... - - // FIXME: This should rewrite simple/straight-forward uses such as - // getelementptr instructions to not use the Cast! - G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG, - G1->getType())); - G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, - G2->getType())); - - // Remove the two globals from the module now... - M->getGlobalList().erase(G1); - M->getGlobalList().erase(G2); - - // Put the new global into the AppendingVars map so that we can handle - // linking of more than two vars... - Second->second = NG; - } - AppendingVars.erase(First); +/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest +/// module. +void ModuleLinker::linkNamedMDNodes() { + for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(), + E = SrcM->named_metadata_end(); I != E; ++I) { + NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName()); + // Add Src elements into Dest node. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap, + RF_None, &TypeMap)); } - - return false; } + +bool ModuleLinker::run() { + assert(DstM && "Null Destination module"); + assert(SrcM && "Null Source Module"); -static bool ResolveAliases(Module *Dest) { - for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end(); - I != E; ++I) - // We can't sue resolveGlobalAlias here because we need to preserve - // bitcasts and GEPs. - if (const Constant *C = I->getAliasee()) { - while (dyn_cast<GlobalAlias>(C)) - C = cast<GlobalAlias>(C)->getAliasee(); - const GlobalValue *GV = dyn_cast<GlobalValue>(C); - if (C != I && !(GV && GV->isDeclaration())) - I->replaceAllUsesWith(const_cast<Constant*>(C)); - } - - return false; -} - -// LinkModules - This function links two modules together, with the resulting -// left module modified to be the composite of the two input modules. If an -// error occurs, true is returned and ErrorMsg (if not null) is set to indicate -// the problem. Upon failure, the Dest module could be in a modified state, and -// shouldn't be relied on to be consistent. -bool -Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { - assert(Dest != 0 && "Invalid Destination module"); - assert(Src != 0 && "Invalid Source Module"); - - if (Dest->getDataLayout().empty()) { - if (!Src->getDataLayout().empty()) { - Dest->setDataLayout(Src->getDataLayout()); - } else { - std::string DataLayout; - - if (Dest->getEndianness() == Module::AnyEndianness) { - if (Src->getEndianness() == Module::BigEndian) - DataLayout.append("E"); - else if (Src->getEndianness() == Module::LittleEndian) - DataLayout.append("e"); - } - - if (Dest->getPointerSize() == Module::AnyPointerSize) { - if (Src->getPointerSize() == Module::Pointer64) - DataLayout.append(DataLayout.length() == 0 ? "p:64:64" : "-p:64:64"); - else if (Src->getPointerSize() == Module::Pointer32) - DataLayout.append(DataLayout.length() == 0 ? "p:32:32" : "-p:32:32"); - } - Dest->setDataLayout(DataLayout); - } - } + // Inherit the target data from the source module if the destination module + // doesn't have one already. + if (DstM->getDataLayout().empty() && !SrcM->getDataLayout().empty()) + DstM->setDataLayout(SrcM->getDataLayout()); // Copy the target triple from the source to dest if the dest's is empty. - if (Dest->getTargetTriple().empty() && !Src->getTargetTriple().empty()) - Dest->setTargetTriple(Src->getTargetTriple()); + if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty()) + DstM->setTargetTriple(SrcM->getTargetTriple()); - if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() && - Src->getDataLayout() != Dest->getDataLayout()) + if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() && + SrcM->getDataLayout() != DstM->getDataLayout()) errs() << "WARNING: Linking two modules of different data layouts!\n"; - if (!Src->getTargetTriple().empty() && - Dest->getTargetTriple() != Src->getTargetTriple()) { + if (!SrcM->getTargetTriple().empty() && + DstM->getTargetTriple() != SrcM->getTargetTriple()) { errs() << "WARNING: Linking two modules of different target triples: "; - if (!Src->getModuleIdentifier().empty()) - errs() << Src->getModuleIdentifier() << ": "; - errs() << "'" << Src->getTargetTriple() << "' and '" - << Dest->getTargetTriple() << "'\n"; + if (!SrcM->getModuleIdentifier().empty()) + errs() << SrcM->getModuleIdentifier() << ": "; + errs() << "'" << SrcM->getTargetTriple() << "' and '" + << DstM->getTargetTriple() << "'\n"; } // Append the module inline asm string. - if (!Src->getModuleInlineAsm().empty()) { - if (Dest->getModuleInlineAsm().empty()) - Dest->setModuleInlineAsm(Src->getModuleInlineAsm()); + if (!SrcM->getModuleInlineAsm().empty()) { + if (DstM->getModuleInlineAsm().empty()) + DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm()); else - Dest->setModuleInlineAsm(Dest->getModuleInlineAsm()+"\n"+ - Src->getModuleInlineAsm()); + DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+ + SrcM->getModuleInlineAsm()); } // Update the destination module's dependent libraries list with the libraries // from the source module. There's no opportunity for duplicates here as the // Module ensures that duplicate insertions are discarded. - for (Module::lib_iterator SI = Src->lib_begin(), SE = Src->lib_end(); + for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end(); SI != SE; ++SI) - Dest->addLibrary(*SI); + DstM->addLibrary(*SI); + + // If the source library's module id is in the dependent library list of the + // destination library, remove it since that module is now linked in. + StringRef ModuleId = SrcM->getModuleIdentifier(); + if (!ModuleId.empty()) + DstM->removeLibrary(sys::path::stem(ModuleId)); - // LinkTypes - Go through the symbol table of the Src module and see if any - // types are named in the src module that are not named in the Dst module. - // Make sure there are no type name conflicts. - if (LinkTypes(Dest, Src, ErrorMsg)) - return true; + + // Loop over all of the linked values to compute type mappings. + computeTypeMapping(); - // ValueMap - Mapping of values from what they used to be in Src, to what they - // are now in Dest. ValueToValueMapTy is a ValueMap, which involves some - // overhead due to the use of Value handles which the Linker doesn't actually - // need, but this allows us to reuse the ValueMapper code. - ValueToValueMapTy ValueMap; - - // AppendingVars - Keep track of global variables in the destination module - // with appending linkage. After the module is linked together, they are - // appended and the module is rewritten. - std::multimap<std::string, GlobalVariable *> AppendingVars; - for (Module::global_iterator I = Dest->global_begin(), E = Dest->global_end(); - I != E; ++I) { - // Add all of the appending globals already in the Dest module to - // AppendingVars. - if (I->hasAppendingLinkage()) - AppendingVars.insert(std::make_pair(I->getName(), I)); - } + // Remap all of the named mdnoes in Src into the DstM module. We do this + // after linking GlobalValues so that MDNodes that reference GlobalValues + // are properly remapped. + linkNamedMDNodes(); - // Insert all of the globals in src into the Dest module... without linking + // Insert all of the globals in src into the DstM module... without linking // initializers (which could refer to functions not yet mapped over). - if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg)) - return true; + for (Module::global_iterator I = SrcM->global_begin(), + E = SrcM->global_end(); I != E; ++I) + if (linkGlobalProto(I)) + return true; // Link the functions together between the two modules, without doing function - // bodies... this just adds external function prototypes to the Dest + // bodies... this just adds external function prototypes to the DstM // function... We do this so that when we begin processing function bodies, // all of the global values that may be referenced are available in our // ValueMap. - if (LinkFunctionProtos(Dest, Src, ValueMap, ErrorMsg)) - return true; - - // If there were any alias, link them now. We really need to do this now, - // because all of the aliases that may be referenced need to be available in - // ValueMap - if (LinkAlias(Dest, Src, ValueMap, ErrorMsg)) return true; - - // Update the initializers in the Dest module now that all globals that may - // be referenced are in Dest. - if (LinkGlobalInits(Dest, Src, ValueMap, ErrorMsg)) return true; + for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) + if (linkFunctionProto(I)) + return true; - // Link in the function bodies that are defined in the source module into the - // DestModule. This consists basically of copying the function over and - // fixing up references to values. - if (LinkFunctionBodies(Dest, Src, ValueMap, ErrorMsg)) return true; + // If there were any aliases, link them now. + for (Module::alias_iterator I = SrcM->alias_begin(), + E = SrcM->alias_end(); I != E; ++I) + if (linkAliasProto(I)) + return true; - // If there were any appending global variables, link them together now. - if (LinkAppendingVars(Dest, AppendingVars, ErrorMsg)) return true; + for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i) + linkAppendingVarInit(AppendingVars[i]); + + // Update the initializers in the DstM module now that all globals that may + // be referenced are in DstM. + linkGlobalInits(); + + // Link in the function bodies that are defined in the source module into + // DstM. + for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) { + if (SF->isDeclaration()) continue; // No body if function is external. + + linkFunctionBody(cast<Function>(ValueMap[SF]), SF); + } - // Resolve all uses of aliases with aliasees - if (ResolveAliases(Dest)) return true; + // Resolve all uses of aliases with aliasees. + linkAliasBodies(); - // Remap all of the named mdnoes in Src into the Dest module. We do this - // after linking GlobalValues so that MDNodes that reference GlobalValues - // are properly remapped. - LinkNamedMDNodes(Dest, Src, ValueMap); + // Now that all of the types from the source are used, resolve any structs + // copied over to the dest that didn't exist there. + TypeMap.linkDefinedTypeBodies(); + + return false; +} - // If the source library's module id is in the dependent library list of the - // destination library, remove it since that module is now linked in. - const std::string &modId = Src->getModuleIdentifier(); - if (!modId.empty()) - Dest->removeLibrary(sys::path::stem(modId)); +//===----------------------------------------------------------------------===// +// LinkModules entrypoint. +//===----------------------------------------------------------------------===// +// LinkModules - This function links two modules together, with the resulting +// left module modified to be the composite of the two input modules. If an +// error occurs, true is returned and ErrorMsg (if not null) is set to indicate +// the problem. Upon failure, the Dest module could be in a modified state, and +// shouldn't be relied on to be consistent. +bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { + ModuleLinker TheLinker(Dest, Src); + if (TheLinker.run()) { + if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg; + return true; + } + return false; } - -// vim: sw=2 diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index a77ecd3bd8ad..22afa7e91cbe 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -28,12 +28,14 @@ add_llvm_library(LLVMMC MCSectionELF.cpp MCSectionMachO.cpp MCStreamer.cpp + MCSubtargetInfo.cpp MCSymbol.cpp MCValue.cpp MCWin64EH.cpp MachObjectWriter.cpp WinCOFFStreamer.cpp WinCOFFObjectWriter.cpp + SubtargetFeature.cpp TargetAsmBackend.cpp ) diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 73b259eaa0fe..502b60b0edf4 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -23,6 +23,9 @@ using namespace llvm; MCAsmInfo::MCAsmInfo() { + PointerSize = 4; + IsLittleEndian = true; + StackGrowsUp = false; HasSubsectionsViaSymbols = false; HasMachoZeroFillDirective = false; HasMachoTBSSDirective = false; @@ -78,6 +81,7 @@ MCAsmInfo::MCAsmInfo() { DwarfRequiresRelocationForSectionOffset = true; DwarfSectionOffsetDirective = 0; DwarfUsesLabelOffsetForRanges = true; + DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; AsmTransCBE = 0; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e8b09fcaced8..d5d08e8f69fb 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -33,8 +34,10 @@ using namespace llvm; namespace { class MCAsmStreamer : public MCStreamer { +protected: formatted_raw_ostream &OS; const MCAsmInfo &MAI; +private: OwningPtr<MCInstPrinter> InstPrinter; OwningPtr<MCCodeEmitter> Emitter; OwningPtr<TargetAsmBackend> AsmBackend; @@ -134,7 +137,8 @@ public: virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label); + const MCSymbol *Label, + unsigned PointerSize); virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, const MCSymbol *Label); @@ -361,9 +365,9 @@ void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label) { - EmitDwarfSetLineAddr(LineDelta, Label, - getContext().getTargetAsmInfo().getPointerSize()); + const MCSymbol *Label, + unsigned PointerSize) { + EmitDwarfSetLineAddr(LineDelta, Label, PointerSize); } void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, @@ -600,7 +604,7 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, int64_t IntValue; if (!Value->EvaluateAsAbsolute(IntValue)) report_fatal_error("Don't know how to emit this value."); - if (getContext().getTargetAsmInfo().isLittleEndian()) { + if (getContext().getAsmInfo().isLittleEndian()) { EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); } else { @@ -822,9 +826,9 @@ void MCAsmStreamer::EmitCFIEndProc() { } void MCAsmStreamer::EmitRegisterName(int64_t Register) { - if (InstPrinter) { - const TargetAsmInfo &asmInfo = getContext().getTargetAsmInfo(); - unsigned LLVMRegister = asmInfo.getLLVMRegNum(Register, true); + if (InstPrinter && !MAI.useDwarfRegNumForCFI()) { + const TargetAsmInfo &TAI = getContext().getTargetAsmInfo(); + unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true); InstPrinter->printRegName(OS, LLVMRegister); } else { OS << Register; @@ -1085,7 +1089,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { } } - // FIXME: Node the fixup comments for Thumb2 are completely bogus since the + // FIXME: Note the fixup comments for Thumb2 are completely bogus since the // high order halfword of a 32-bit Thumb2 instruction is emitted first. OS << "encoding: ["; for (unsigned i = 0, e = Code.size(); i != e; ++i) { @@ -1120,7 +1124,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { unsigned Bit = (Code[i] >> j) & 1; unsigned FixupBit; - if (getContext().getTargetAsmInfo().isLittleEndian()) + if (getContext().getAsmInfo().isLittleEndian()) FixupBit = i * 8 + j; else FixupBit = i * 8 + (7-j); @@ -1241,13 +1245,12 @@ void MCAsmStreamer::Finish() { if (!UseCFI) EmitFrames(false); } - MCStreamer *llvm::createAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, - bool useCFI, - MCInstPrinter *IP, MCCodeEmitter *CE, - TargetAsmBackend *TAB, bool ShowInst) { + bool useCFI, MCInstPrinter *IP, + MCCodeEmitter *CE, TargetAsmBackend *TAB, + bool ShowInst) { return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI, IP, CE, TAB, ShowInst); } diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 6e636f07f1d1..5480b4b12b2c 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -40,6 +40,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, llvm::InitializeAllTargetInfos(); // FIXME: We shouldn't need to initialize the Target(Machine)s. llvm::InitializeAllTargets(); + llvm::InitializeAllMCAsmInfos(); llvm::InitializeAllAsmPrinters(); llvm::InitializeAllAsmParsers(); llvm::InitializeAllDisassemblers(); @@ -50,16 +51,18 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, assert(TheTarget && "Unable to create target!"); // Get the assembler info needed to setup the MCContext. - const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName); + const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName); assert(MAI && "Unable to create target asm info!"); // Package up features to be passed to target/subtarget std::string FeaturesStr; + std::string CPU; // FIXME: We shouldn't need to do this (and link in codegen). // When we split this out, we should do it in a way that makes // it straightforward to switch subtargets on the fly. - TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr); + TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU, + FeaturesStr); assert(TM && "Unable to create target machine!"); // Get the target assembler info needed to setup the context. @@ -77,7 +80,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); - MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant, + MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, *MAI); assert(IP && "Unable to create instruction printer!"); diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 91c5284892a5..bdd99afe1ae4 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -106,6 +107,7 @@ void EDDisassembler::initialize() { InitializeAllTargetInfos(); InitializeAllTargets(); + InitializeAllMCAsmInfos(); InitializeAllAsmPrinters(); InitializeAllAsmParsers(); InitializeAllDisassemblers(); @@ -167,11 +169,11 @@ EDDisassembler::EDDisassembler(CPUKey &key) : if (!Tgt) return; + std::string CPU; std::string featureString; - - TargetMachine.reset(Tgt->createTargetMachine(tripleString, + TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU, featureString)); - + const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); if (!registerInfo) @@ -179,11 +181,11 @@ EDDisassembler::EDDisassembler(CPUKey &key) : initMaps(*registerInfo); - AsmInfo.reset(Tgt->createAsmInfo(tripleString)); + AsmInfo.reset(Tgt->createMCAsmInfo(tripleString)); if (!AsmInfo) return; - + Disassembler.reset(Tgt->createMCDisassembler()); if (!Disassembler) @@ -193,8 +195,7 @@ EDDisassembler::EDDisassembler(CPUKey &key) : InstString.reset(new std::string); InstStream.reset(new raw_string_ostream(*InstString)); - InstPrinter.reset(Tgt->createMCInstPrinter(*TargetMachine, LLVMSyntaxVariant, - *AsmInfo)); + InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); if (!InstPrinter) return; @@ -372,8 +373,11 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr, context, *streamer, *AsmInfo)); - OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser, - *TargetMachine)); + + StringRef triple = tripleFromArch(Key.Arch); + OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", "")); + OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI, + *genericParser)); AsmToken OpcodeToken = genericParser->Lex(); AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h index 2fcc09d4bef0..11d69c151cf9 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.h +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -41,6 +41,7 @@ class MCInstPrinter; class MCInst; class MCParsedAsmOperand; class MCStreamer; +class MCSubtargetInfo; template <typename T> class SmallVectorImpl; class SourceMgr; class Target; diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h index ad5728263133..e43ad1635246 100644 --- a/lib/MC/MCDisassembler/EDInfo.h +++ b/lib/MC/MCDisassembler/EDInfo.h @@ -25,8 +25,11 @@ enum OperandTypes { kOperandTypeARMBranchTarget, kOperandTypeARMSoReg, kOperandTypeARMSoImm, + kOperandTypeARMRotImm, kOperandTypeARMSoImm2Part, kOperandTypeARMPredicate, + kOperandTypeAddrModeImm12, + kOperandTypeLdStSOReg, kOperandTypeARMAddrMode2, kOperandTypeARMAddrMode2Offset, kOperandTypeARMAddrMode3, @@ -38,13 +41,20 @@ enum OperandTypes { kOperandTypeARMAddrMode7, kOperandTypeARMAddrModePC, kOperandTypeARMRegisterList, + kOperandTypeARMDPRRegisterList, + kOperandTypeARMSPRRegisterList, kOperandTypeARMTBAddrMode, kOperandTypeThumbITMask, - kOperandTypeThumbAddrModeS1, - kOperandTypeThumbAddrModeS2, - kOperandTypeThumbAddrModeS4, + kOperandTypeThumbAddrModeRegS1, + kOperandTypeThumbAddrModeRegS2, + kOperandTypeThumbAddrModeRegS4, + kOperandTypeThumbAddrModeImmS1, + kOperandTypeThumbAddrModeImmS2, + kOperandTypeThumbAddrModeImmS4, kOperandTypeThumbAddrModeRR, kOperandTypeThumbAddrModeSP, + kOperandTypeThumbAddrModePC, + kOperandTypeThumb2AddrModeReg, kOperandTypeThumb2SoReg, kOperandTypeThumb2SoImm, kOperandTypeThumb2AddrModeImm8, @@ -52,8 +62,7 @@ enum OperandTypes { kOperandTypeThumb2AddrModeImm12, kOperandTypeThumb2AddrModeSoReg, kOperandTypeThumb2AddrModeImm8s4, - kOperandTypeThumb2AddrModeImm8s4Offset, - kOperandTypeThumb2AddrModeReg + kOperandTypeThumb2AddrModeImm8s4Offset }; enum OperandFlags { diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp index 492bb08f336a..6a4e56ff72c4 100644 --- a/lib/MC/MCDisassembler/EDOperand.cpp +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -61,11 +61,14 @@ EDOperand::EDOperand(const EDDisassembler &disassembler, switch (operandType) { default: case kOperandTypeARMRegisterList: + case kOperandTypeARMDPRRegisterList: + case kOperandTypeARMSPRRegisterList: break; case kOperandTypeImmediate: case kOperandTypeRegister: case kOperandTypeARMBranchTarget: case kOperandTypeARMSoImm: + case kOperandTypeARMRotImm: case kOperandTypeThumb2SoImm: case kOperandTypeARMSoImm2Part: case kOperandTypeARMPredicate: @@ -78,6 +81,7 @@ EDOperand::EDOperand(const EDDisassembler &disassembler, numMCOperands = 1; break; case kOperandTypeThumb2SoReg: + case kOperandTypeAddrModeImm12: case kOperandTypeARMAddrMode2Offset: case kOperandTypeARMAddrMode3Offset: case kOperandTypeARMAddrMode4: @@ -86,17 +90,22 @@ EDOperand::EDOperand(const EDDisassembler &disassembler, case kOperandTypeThumb2AddrModeImm8: case kOperandTypeThumb2AddrModeImm12: case kOperandTypeThumb2AddrModeImm8s4: + case kOperandTypeThumbAddrModeImmS1: + case kOperandTypeThumbAddrModeImmS2: + case kOperandTypeThumbAddrModeImmS4: case kOperandTypeThumbAddrModeRR: case kOperandTypeThumbAddrModeSP: + case kOperandTypeThumbAddrModePC: numMCOperands = 2; break; case kOperandTypeARMSoReg: + case kOperandTypeLdStSOReg: case kOperandTypeARMAddrMode2: case kOperandTypeARMAddrMode3: case kOperandTypeThumb2AddrModeSoReg: - case kOperandTypeThumbAddrModeS1: - case kOperandTypeThumbAddrModeS2: - case kOperandTypeThumbAddrModeS4: + case kOperandTypeThumbAddrModeRegS1: + case kOperandTypeThumbAddrModeRegS2: + case kOperandTypeThumbAddrModeRegS4: case kOperandTypeARMAddrMode6Offset: numMCOperands = 3; break; @@ -270,9 +279,9 @@ int EDOperand::isMemory() { case kOperandTypeARMAddrMode7: case kOperandTypeARMAddrModePC: case kOperandTypeARMBranchTarget: - case kOperandTypeThumbAddrModeS1: - case kOperandTypeThumbAddrModeS2: - case kOperandTypeThumbAddrModeS4: + case kOperandTypeThumbAddrModeRegS1: + case kOperandTypeThumbAddrModeRegS2: + case kOperandTypeThumbAddrModeRegS4: case kOperandTypeThumbAddrModeRR: case kOperandTypeThumbAddrModeSP: case kOperandTypeThumb2SoImm: diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 13cb81ab441b..ad86db13d510 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -7,22 +7,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/FoldingSet.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmBackend.h" #include "llvm/Target/TargetAsmInfo.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" using namespace llvm; // Given a special op, return the address skip amount (in units of @@ -30,28 +29,27 @@ using namespace llvm; #define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE) // The maximum address skip amount that can be encoded with a special op. -#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255) +#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255) // First special line opcode - leave room for the standard opcodes. // Note: If you want to change this, you'll have to update the // "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit(). -#define DWARF2_LINE_OPCODE_BASE 13 +#define DWARF2_LINE_OPCODE_BASE 13 // Minimum line offset in a special line info. opcode. This value // was chosen to give a reasonable range of values. -#define DWARF2_LINE_BASE -5 +#define DWARF2_LINE_BASE -5 // Range of line offsets in a special line info. opcode. -# define DWARF2_LINE_RANGE 14 +#define DWARF2_LINE_RANGE 14 // Define the architecture-dependent minimum instruction length (in bytes). // This value should be rather too small than too big. -# define DWARF2_LINE_MIN_INSN_LENGTH 1 +#define DWARF2_LINE_MIN_INSN_LENGTH 1 // Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting, // this routine is a nop and will be optimized away. -static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) -{ +static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) { if (DWARF2_LINE_MIN_INSN_LENGTH == 1) return AddrDelta; if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) { @@ -174,7 +172,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // At this point we want to emit/create the sequence to encode the delta in // line numbers and the increment of the address from the previous Label // and the current Label. - MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label); + const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo(); + MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, + asmInfo.getPointerSize()); LastLine = it->getLine(); LastLabel = Label; @@ -198,7 +198,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // Switch back the the dwarf line section. MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection()); - MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd); + const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo(); + MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, + asmInfo.getPointerSize()); } // @@ -291,7 +293,7 @@ void MCDwarfFileTable::Emit(MCStreamer *MCOS) { const std::vector<const MCSection *> &MCLineSectionOrder = MCOS->getContext().getMCLineSectionOrder(); for (std::vector<const MCSection*>::const_iterator it = - MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie; + MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie; ++it) { const MCSection *Sec = *it; const MCLineSection *Line = MCLineSections.lookup(Sec); @@ -354,10 +356,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, OS << char(dwarf::DW_LNS_const_add_pc); else { OS << char(dwarf::DW_LNS_advance_pc); - SmallString<32> Tmp; - raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeULEB128(AddrDelta, OSE); - OS << OSE.str(); + MCObjectWriter::EncodeULEB128(AddrDelta, OS); } OS << char(dwarf::DW_LNS_extended_op); OS << char(1); @@ -432,25 +431,24 @@ void MCDwarfFile::dump() const { static int getDataAlignmentFactor(MCStreamer &streamer) { MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + const MCAsmInfo &asmInfo = context.getAsmInfo(); int size = asmInfo.getPointerSize(); - if (asmInfo.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) + if (asmInfo.isStackGrowthDirectionUp()) return size; - else - return -size; + else + return -size; } static unsigned getSizeForEncoding(MCStreamer &streamer, unsigned symbolEncoding) { MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); unsigned format = symbolEncoding & 0x0f; switch (format) { default: assert(0 && "Unknown Encoding"); case dwarf::DW_EH_PE_absptr: case dwarf::DW_EH_PE_signed: - return asmInfo.getPointerSize(); + return context.getAsmInfo().getPointerSize(); case dwarf::DW_EH_PE_udata2: case dwarf::DW_EH_PE_sdata2: return 2; @@ -464,13 +462,14 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, } static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, - unsigned symbolEncoding) { + unsigned symbolEncoding, const char *comment = 0) { MCContext &context = streamer.getContext(); const MCAsmInfo &asmInfo = context.getAsmInfo(); const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol, symbolEncoding, streamer); unsigned size = getSizeForEncoding(streamer, symbolEncoding); + if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment); streamer.EmitAbsValue(v, size); } @@ -486,11 +485,11 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, } static const MachineLocation TranslateMachineLocation( - const TargetAsmInfo &AsmInfo, + const TargetAsmInfo &TAI, const MachineLocation &Loc) { unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? MachineLocation::VirtualFP : - unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true)); + unsigned(TAI.getDwarfRegNum(Loc.getReg(), true)); const MachineLocation &NewLoc = Loc.isReg() ? MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); return NewLoc; @@ -503,13 +502,18 @@ namespace { bool UsingCFI; bool IsEH; const MCSymbol *SectionStart; - public: FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) : CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH), SectionStart(sectionStart) { } + /// EmitCompactUnwind - Emit the unwind information in a compact way. If + /// we're successful, return 'true'. Otherwise, return 'false' and it will + /// emit the normal CIE and FDE. + bool EmitCompactUnwind(MCStreamer &streamer, + const MCDwarfFrameInfo &frame); + const MCSymbol &EmitCIE(MCStreamer &streamer, const MCSymbol *personality, unsigned personalityEncoding, @@ -524,11 +528,46 @@ namespace { void EmitCFIInstruction(MCStreamer &Streamer, const MCCFIInstruction &Instr); }; + +} // end anonymous namespace + +static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding, + StringRef Prefix) { + if (Streamer.isVerboseAsm()) { + const char *EncStr = 0; + switch (Encoding) { + default: EncStr = "<unknown encoding>"; + case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; + case dwarf::DW_EH_PE_omit: EncStr = "omit"; + case dwarf::DW_EH_PE_pcrel: EncStr = "pcrel"; + case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; + case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; + case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; + case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4: + EncStr = "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4: + EncStr = "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8: + EncStr = "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8: + EncStr = "indirect pcrel sdata8"; + } + + Streamer.AddComment(Twine(Prefix) + " = " + EncStr); + } + + Streamer.EmitIntValue(Encoding, 1); } void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); + bool VerboseAsm = Streamer.isVerboseAsm(); switch (Instr.getOperation()) { case MCCFIInstruction::Move: @@ -540,9 +579,13 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, // If advancing cfa. if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { if (Src.getReg() == MachineLocation::VirtualFP) { + if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset"); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1); } else { + if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa"); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + + Twine(Src.getReg())); Streamer.EmitULEB128IntValue(Src.getReg()); } @@ -551,47 +594,62 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, else CFAOffset = -Src.getOffset(); + if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset))); Streamer.EmitULEB128IntValue(CFAOffset); return; } if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { assert(Dst.isReg() && "Machine move not supported yet."); + if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register"); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg())); Streamer.EmitULEB128IntValue(Dst.getReg()); return; } unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); if (IsRelative) Offset -= CFAOffset; Offset = Offset / dataAlignmentFactor; if (Offset < 0) { + if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf"); Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg)); Streamer.EmitULEB128IntValue(Reg); + if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset)); Streamer.EmitSLEB128IntValue(Offset); } else if (Reg < 64) { + if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") + + Twine(Reg) + ")"); Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset)); Streamer.EmitULEB128IntValue(Offset); } else { + if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended"); Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg)); Streamer.EmitULEB128IntValue(Reg); + if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset)); Streamer.EmitULEB128IntValue(Offset); } return; } case MCCFIInstruction::Remember: + if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state"); Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1); return; case MCCFIInstruction::Restore: + if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state"); Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1); return; case MCCFIInstruction::SameValue: { unsigned Reg = Instr.getDestination().getReg(); + if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value"); Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1); + if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg)); Streamer.EmitULEB128IntValue(Reg); return; } @@ -614,6 +672,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, if (BaseLabel && Label) { MCSymbol *ThisSym = Label; if (ThisSym != BaseLabel) { + if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4"); streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym); BaseLabel = ThisSym; } @@ -623,40 +682,128 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, } } +/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're +/// successful, return 'true'. Otherwise, return 'false' and it will emit the +/// normal CIE and FDE. +bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, + const MCDwarfFrameInfo &Frame) { +#if 1 + return false; +#else + MCContext &Context = Streamer.getContext(); + const TargetAsmInfo &TAI = Context.getTargetAsmInfo(); + bool VerboseAsm = Streamer.isVerboseAsm(); + + // range-start range-length compact-unwind-enc personality-func lsda + // _foo LfooEnd-_foo 0x00000023 0 0 + // _bar LbarEnd-_bar 0x00000025 __gxx_personality except_tab1 + // + // .section __LD,__compact_unwind,regular,debug + // + // # compact unwind for _foo + // .quad _foo + // .set L1,LfooEnd-_foo + // .long L1 + // .long 0x01010001 + // .quad 0 + // .quad 0 + // + // # compact unwind for _bar + // .quad _bar + // .set L2,LbarEnd-_bar + // .long L2 + // .long 0x01020011 + // .quad __gxx_personality + // .quad except_tab1 + + uint32_t Encoding = + TAI.getCompactUnwindEncoding(Frame.Instructions, + getDataAlignmentFactor(Streamer), IsEH); + if (!Encoding) return false; + + // The encoding needs to know we have an LSDA. + if (Frame.Lsda) + Encoding |= 0x40000000; + + Streamer.SwitchSection(TAI.getCompactUnwindSection()); + + // Range Start + unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI); + unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); + if (VerboseAsm) Streamer.AddComment("Range Start"); + Streamer.EmitSymbolValue(Frame.Function, Size); + + // Range Length + const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin, + *Frame.End, 0); + if (VerboseAsm) Streamer.AddComment("Range Length"); + Streamer.EmitAbsValue(Range, 4); + + // Compact Encoding + Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4); + if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") + + Twine(llvm::utohexstr(Encoding))); + Streamer.EmitIntValue(Encoding, Size); + + // Personality Function + Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr); + if (VerboseAsm) Streamer.AddComment("Personality Function"); + if (Frame.Personality) + Streamer.EmitSymbolValue(Frame.Personality, Size); + else + Streamer.EmitIntValue(0, Size); // No personality fn + + // LSDA + Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding); + if (VerboseAsm) Streamer.AddComment("LSDA"); + if (Frame.Lsda) + Streamer.EmitSymbolValue(Frame.Lsda, Size); + else + Streamer.EmitIntValue(0, Size); // No LSDA + + return true; +#endif +} + const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, const MCSymbol *personality, unsigned personalityEncoding, const MCSymbol *lsda, unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + const TargetAsmInfo &TAI = context.getTargetAsmInfo(); + bool verboseAsm = streamer.isVerboseAsm(); MCSymbol *sectionStart; - if (asmInfo.isFunctionEHFrameSymbolPrivate() || !IsEH) + if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH) sectionStart = context.CreateTempSymbol(); else sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); + streamer.EmitLabel(sectionStart); CIENum++; - MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol(); + MCSymbol *sectionEnd = context.CreateTempSymbol(); // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart, *sectionEnd, 4); - streamer.EmitLabel(sectionStart); + if (verboseAsm) streamer.AddComment("CIE Length"); streamer.EmitAbsValue(Length, 4); // CIE ID unsigned CIE_ID = IsEH ? 0 : -1; + if (verboseAsm) streamer.AddComment("CIE ID Tag"); streamer.EmitIntValue(CIE_ID, 4); // Version + if (verboseAsm) streamer.AddComment("DW_CIE_VERSION"); streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1); // Augmentation String SmallString<8> Augmentation; if (IsEH) { + if (verboseAsm) streamer.AddComment("CIE Augmentation"); Augmentation += "z"; if (personality) Augmentation += "P"; @@ -668,13 +815,16 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, streamer.EmitIntValue(0, 1); // Code Alignment Factor + if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor"); streamer.EmitULEB128IntValue(1); // Data Alignment Factor + if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor"); streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer)); // Return Address Register - streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true)); + if (verboseAsm) streamer.AddComment("CIE Return Address Column"); + streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true)); // Augmentation Data Length (optional) @@ -691,32 +841,38 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Encoding of the FDE pointers augmentationLength += 1; + if (verboseAsm) streamer.AddComment("Augmentation Size"); streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data (optional) if (personality) { // Personality Encoding - streamer.EmitIntValue(personalityEncoding, 1); + EmitEncodingByte(streamer, personalityEncoding, + "Personality Encoding"); // Personality + if (verboseAsm) streamer.AddComment("Personality"); EmitPersonality(streamer, *personality, personalityEncoding); } + if (lsda) - streamer.EmitIntValue(lsdaEncoding, 1); // LSDA Encoding + EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding"); + // Encoding of the FDE pointers - streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1); + EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI), + "FDE Encoding"); } // Initial Instructions - const std::vector<MachineMove> Moves = asmInfo.getInitialFrameState(); + const std::vector<MachineMove> &Moves = TAI.getInitialFrameState(); std::vector<MCCFIInstruction> Instructions; for (int i = 0, n = Moves.size(); i != n; ++i) { MCSymbol *Label = Moves[i].getLabel(); const MachineLocation &Dst = - TranslateMachineLocation(asmInfo, Moves[i].getDestination()); + TranslateMachineLocation(TAI, Moves[i].getDestination()); const MachineLocation &Src = - TranslateMachineLocation(asmInfo, Moves[i].getSource()); + TranslateMachineLocation(TAI, Moves[i].getSource()); MCCFIInstruction Inst(Label, Dst, Src); Instructions.push_back(Inst); } @@ -724,7 +880,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, EmitCFIInstructions(streamer, Instructions, NULL); // Padding - streamer.EmitValueToAlignment(IsEH ? 4 : asmInfo.getPointerSize()); + streamer.EmitValueToAlignment(IsEH + ? 4 : context.getAsmInfo().getPointerSize()); streamer.EmitLabel(sectionEnd); return *sectionStart; @@ -736,17 +893,19 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, MCContext &context = streamer.getContext(); MCSymbol *fdeStart = context.CreateTempSymbol(); MCSymbol *fdeEnd = context.CreateTempSymbol(); - const TargetAsmInfo &TAsmInfo = context.getTargetAsmInfo(); + const TargetAsmInfo &TAI = context.getTargetAsmInfo(); + bool verboseAsm = streamer.isVerboseAsm(); - if (!TAsmInfo.isFunctionEHFrameSymbolPrivate() && IsEH) { - MCSymbol *EHSym = context.GetOrCreateSymbol( - frame.Function->getName() + Twine(".eh")); + if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) { + MCSymbol *EHSym = + context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh")); streamer.EmitEHSymAttributes(frame.Function, EHSym); streamer.EmitLabel(EHSym); } // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); + if (verboseAsm) streamer.AddComment("FDE Length"); streamer.EmitAbsValue(Length, 4); streamer.EmitLabel(fdeStart); @@ -756,6 +915,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, if (IsEH) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart, 0); + if (verboseAsm) streamer.AddComment("FDE CIE Offset"); streamer.EmitAbsValue(offset, 4); } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart, @@ -764,18 +924,20 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, } else { streamer.EmitSymbolValue(&cieStart, 4); } - unsigned fdeEncoding = TAsmInfo.getFDEEncoding(UsingCFI); + + unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI); unsigned size = getSizeForEncoding(streamer, fdeEncoding); // PC Begin unsigned PCBeginEncoding = IsEH ? fdeEncoding : (unsigned)dwarf::DW_EH_PE_absptr; unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding); - EmitSymbol(streamer, *frame.Begin, PCBeginEncoding); + EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location"); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, *frame.End, 0); + if (verboseAsm) streamer.AddComment("FDE address range"); streamer.EmitAbsValue(Range, size); if (IsEH) { @@ -785,11 +947,13 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, if (frame.Lsda) augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding); + if (verboseAsm) streamer.AddComment("Augmentation size"); streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data if (frame.Lsda) - EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding); + EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding, + "Language Specific Data Area"); } // Call Frame Instructions @@ -843,39 +1007,47 @@ namespace llvm { }; } -void MCDwarfFrameEmitter::Emit(MCStreamer &streamer, - bool usingCFI, - bool isEH) { - MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); - const MCSection §ion = isEH ? - *asmInfo.getEHFrameSection() : *asmInfo.getDwarfFrameSection(); - streamer.SwitchSection(§ion); - MCSymbol *SectionStart = context.CreateTempSymbol(); - streamer.EmitLabel(SectionStart); - - MCSymbol *fdeEnd = NULL; +void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, + bool UsingCFI, + bool IsEH) { + MCContext &Context = Streamer.getContext(); + const TargetAsmInfo &TAI = Context.getTargetAsmInfo(); + const MCSection &Section = IsEH ? *TAI.getEHFrameSection() : + *TAI.getDwarfFrameSection(); + Streamer.SwitchSection(&Section); + MCSymbol *SectionStart = Context.CreateTempSymbol(); + Streamer.EmitLabel(SectionStart); + + MCSymbol *FDEEnd = NULL; DenseMap<CIEKey, const MCSymbol*> CIEStarts; - FrameEmitterImpl Emitter(usingCFI, isEH, SectionStart); + FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart); const MCSymbol *DummyDebugKey = NULL; - for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { - const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); - CIEKey key(frame.Personality, frame.PersonalityEncoding, - frame.LsdaEncoding); - const MCSymbol *&cieStart = isEH ? CIEStarts[key] : DummyDebugKey; - if (!cieStart) - cieStart = &Emitter.EmitCIE(streamer, frame.Personality, - frame.PersonalityEncoding, frame.Lsda, - frame.LsdaEncoding); - fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame); + for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { + const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); + if (IsEH && TAI.getCompactUnwindSection() && + Emitter.EmitCompactUnwind(Streamer, Frame)) { + FDEEnd = NULL; + continue; + } + + CIEKey Key(Frame.Personality, Frame.PersonalityEncoding, + Frame.LsdaEncoding); + const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey; + if (!CIEStart) + CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality, + Frame.PersonalityEncoding, Frame.Lsda, + Frame.LsdaEncoding); + + FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame); + if (i != n - 1) - streamer.EmitLabel(fdeEnd); + Streamer.EmitLabel(FDEEnd); } - streamer.EmitValueToAlignment(asmInfo.getPointerSize()); - if (fdeEnd) - streamer.EmitLabel(fdeEnd); + Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize()); + if (FDEEnd) + Streamer.EmitLabel(FDEEnd); } void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer, diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index bbb2789ea81c..49340edbed5e 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -26,7 +26,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetAsmInfo.h" using namespace llvm; diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h index db34d58ec600..855e7e9ca60f 100644 --- a/lib/MC/MCELFStreamer.h +++ b/lib/MC/MCELFStreamer.h @@ -138,137 +138,3 @@ private: } // end llvm namespace #endif -//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file assembles .s files and emits ELF .o object files. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MC_MCELFSTREAMER_H -#define LLVM_MC_MCELFSTREAMER_H - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSectionELF.h" - -namespace llvm { - -class MCELFStreamer : public MCObjectStreamer { -public: - MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter) {} - - ~MCELFStreamer() {} - - /// @name MCStreamer Interface - /// @{ - - virtual void InitSections(); - virtual void ChangeSection(const MCSection *Section); - virtual void EmitLabel(MCSymbol *Symbol); - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); - virtual void EmitThumbFunc(MCSymbol *Func); - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); - virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); - virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitCOFFSymbolStorageClass(int StorageClass) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitCOFFSymbolType(int Type) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EndCOFFSymbolDef() { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setSize(Value); - } - - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); - - virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment = 0) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0); - virtual void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0); - - virtual void EmitFileDirective(StringRef Filename); - - virtual void Finish(); - -private: - virtual void EmitInstToFragment(const MCInst &Inst); - virtual void EmitInstToData(const MCInst &Inst); - - void fixSymbolsInTLSFixups(const MCExpr *expr); - - struct LocalCommon { - MCSymbolData *SD; - uint64_t Size; - unsigned ByteAlignment; - }; - std::vector<LocalCommon> LocalCommons; - - SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet; - /// @} - void SetSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind) { - SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); - } - - void SetSectionData() { - SetSection(".data", ELF::SHT_PROGBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, - SectionKind::getDataRel()); - EmitCodeAlignment(4, 0); - } - void SetSectionText() { - SetSection(".text", ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | - ELF::SHF_ALLOC, SectionKind::getText()); - EmitCodeAlignment(4, 0); - } - void SetSectionBss() { - SetSection(".bss", ELF::SHT_NOBITS, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, SectionKind::getBSS()); - EmitCodeAlignment(4, 0); - } -}; - -} // end llvm namespace - -#endif diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp index 46ea9b844a6a..309752ec5f02 100644 --- a/lib/MC/MCLoggingStreamer.cpp +++ b/lib/MC/MCLoggingStreamer.cpp @@ -85,9 +85,11 @@ public: virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label) { + const MCSymbol *Label, + unsigned PointerSize) { LogCall("EmitDwarfAdvanceLineAddr"); - return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label); + return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, + PointerSize); } virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 12aeb4f48fda..1b21249ca321 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetAsmInfo.h" using namespace llvm; diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index f38b82231207..9577af010205 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -44,7 +44,8 @@ namespace { virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){} virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label) {} + const MCSymbol *Label, + unsigned PointerSize) {} virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){} diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index e230c5330203..8635aac00302 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -18,7 +18,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetAsmBackend.h" -#include "llvm/Target/TargetAsmInfo.h" using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, @@ -197,9 +196,9 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label) { + const MCSymbol *Label, + unsigned PointerSize) { if (!LastLabel) { - int PointerSize = getContext().getTargetAsmInfo().getPointerSize(); EmitDwarfSetLineAddr(LineDelta, Label, PointerSize); return; } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4f55cea7bc5e..0c181f39611e 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -84,6 +85,7 @@ private: AsmLexer Lexer; MCContext &Ctx; MCStreamer &Out; + const MCAsmInfo &MAI; SourceMgr &SrcMgr; MCAsmParserExtension *GenericParser; MCAsmParserExtension *PlatformParser; @@ -135,7 +137,7 @@ public: virtual MCContext &getContext() { return Ctx; } virtual MCStreamer &getStreamer() { return Out; } - virtual bool Warning(SMLoc L, const Twine &Meg); + virtual bool Warning(SMLoc L, const Twine &Msg); virtual bool Error(SMLoc L, const Twine &Msg); const AsmToken &Lex(); @@ -160,8 +162,9 @@ private: void HandleMacroExit(); void PrintMacroInstantiations(); - void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const { - SrcMgr.PrintMessage(Loc, Msg, Type); + void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type, + bool ShowLine = true) const { + SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine); } /// EnterIncludeFile - Enter the specified file. This returns true on failure. @@ -337,7 +340,7 @@ enum { DEFAULT_ADDRSPACE = 0 }; AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) - : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), GenericParser(new GenericAsmParser), PlatformParser(0), CurBuffer(0), MacrosEnabled(true) { Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); @@ -466,6 +469,29 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { TokError("unassigned file number: " + Twine(i) + " for .file directives"); } + // Check to see that all assembler local symbols were actually defined. + // Targets that don't do subsections via symbols may not want this, though, + // so conservatively exclude them. Only do this if we're finalizing, though, + // as otherwise we won't necessarilly have seen everything yet. + if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) { + const MCContext::SymbolTable &Symbols = getContext().getSymbols(); + for (MCContext::SymbolTable::const_iterator i = Symbols.begin(), + e = Symbols.end(); + i != e; ++i) { + MCSymbol *Sym = i->getValue(); + // Variable symbols may not be marked as defined, so check those + // explicitly. If we know it's a variable, we have a definition for + // the purposes of this check. + if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined()) + // FIXME: We would really like to refer back to where the symbol was + // first referenced for a source location. We need to add something + // to track that. Currently, we just point to the end of the file. + PrintMessage(getLexer().getLoc(), "assembler local symbol '" + + Sym->getName() + "' not defined", "error", false); + } + } + + // Finalize the output stream if there are no errors and if the client wants // us to. if (!HadError && !NoFinalize) @@ -1121,7 +1147,7 @@ bool AsmParser::ParseStatement() { if (IDVal == ".weak_def_can_be_hidden") return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); - if (IDVal == ".comm") + if (IDVal == ".comm" || IDVal == ".common") return ParseDirectiveComm(/*IsLocal=*/false); if (IDVal == ".lcomm") return ParseDirectiveComm(/*IsLocal=*/true); @@ -1168,7 +1194,7 @@ bool AsmParser::ParseStatement() { for (unsigned i = 0; i != ParsedOperands.size(); ++i) { if (i != 0) OS << ", "; - ParsedOperands[i]->dump(OS); + ParsedOperands[i]->print(OS); } OS << "]"; @@ -1587,13 +1613,18 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { for (;;) { const MCExpr *Value; + SMLoc ExprLoc = getLexer().getLoc(); if (ParseExpression(Value)) return true; // Special case constant expressions to match code generator. - if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) - getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); - else + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE); + } else getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); if (getLexer().is(AsmToken::EndOfStatement)) diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index 64f635517b11..66ad384c7db2 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -401,14 +401,14 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { SMLoc startLoc = getLexer().getLoc(); if (getLexer().is(AsmToken::Percent)) { - const TargetAsmInfo &asmInfo = getContext().getTargetAsmInfo(); + const TargetAsmInfo &TAI = getContext().getTargetAsmInfo(); SMLoc endLoc; unsigned LLVMRegNo; if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc)) return true; // Check that this is a non-volatile register. - const unsigned *NVRegs = asmInfo.getCalleeSavedRegs(); + const unsigned *NVRegs = TAI.getCalleeSavedRegs(); unsigned i; for (i = 0; NVRegs[i] != 0; ++i) if (NVRegs[i] == LLVMRegNo) @@ -416,7 +416,7 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { if (NVRegs[i] == 0) return Error(startLoc, "expected non-volatile register"); - int SEHRegNo = asmInfo.getSEHRegNum(LLVMRegNo); + int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo); if (SEHRegNo < 0) return Error(startLoc,"register can't be represented in SEH unwind info"); RegNo = SEHRegNo; diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 70295efc613c..4030e41036aa 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -12,6 +12,8 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetAsmParser.h" using namespace llvm; @@ -41,4 +43,6 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) { return ParseExpression(Res, L); } - +void MCParsedAsmOperand::dump() const { + dbgs() << " " << *this; +} diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/TargetAsmParser.cpp index 8d43c21f4bc9..512f6b044911 100644 --- a/lib/MC/MCParser/TargetAsmParser.cpp +++ b/lib/MC/MCParser/TargetAsmParser.cpp @@ -10,8 +10,8 @@ #include "llvm/Target/TargetAsmParser.h" using namespace llvm; -TargetAsmParser::TargetAsmParser(const Target &T) - : TheTarget(T), AvailableFeatures(0) +TargetAsmParser::TargetAsmParser() + : AvailableFeatures(0) { } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index ae3ed0f3f61a..6e96b78e315b 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -15,7 +15,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include <cstdlib> @@ -81,7 +80,7 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) && "Invalid size"); char buf[8]; - const bool isLittleEndian = Context.getTargetAsmInfo().isLittleEndian(); + const bool isLittleEndian = Context.getAsmInfo().isLittleEndian(); for (unsigned i = 0; i != Size; ++i) { unsigned index = isLittleEndian ? i : (Size - i - 1); buf[i] = uint8_t(Value >> (index * 8)); diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp new file mode 100644 index 000000000000..86dc1083cee9 --- /dev/null +++ b/lib/MC/MCSubtargetInfo.cpp @@ -0,0 +1,96 @@ +//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +void +MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS, + const SubtargetFeatureKV *PF, + const SubtargetFeatureKV *PD, + const SubtargetInfoKV *PI, + const InstrStage *IS, + const unsigned *OC, + const unsigned *FP, + unsigned NF, unsigned NP) { + TargetTriple = TT; + ProcFeatures = PF; + ProcDesc = PD; + ProcItins = PI; + Stages = IS; + OperandCycles = OC; + ForwardingPathes = FP; + NumFeatures = NF; + NumProcs = NP; + + SubtargetFeatures Features(FS); + FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs, + ProcFeatures, NumFeatures); +} + + +/// ReInitMCSubtargetInfo - Change CPU (and optionally supplemented with +/// feature string) and recompute feature bits. +uint64_t MCSubtargetInfo::ReInitMCSubtargetInfo(StringRef CPU, StringRef FS) { + SubtargetFeatures Features(FS); + FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs, + ProcFeatures, NumFeatures); + return FeatureBits; +} + +/// ToggleFeature - Toggle a feature and returns the re-computed feature +/// bits. This version does not change the implied bits. +uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) { + FeatureBits ^= FB; + return FeatureBits; +} + +/// ToggleFeature - Toggle a feature and returns the re-computed feature +/// bits. This version will also change all implied bits. +uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) { + SubtargetFeatures Features; + FeatureBits = Features.ToggleFeature(FeatureBits, FS, + ProcFeatures, NumFeatures); + return FeatureBits; +} + + +InstrItineraryData +MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { + assert(ProcItins && "Instruction itineraries information not available!"); + +#ifndef NDEBUG + for (size_t i = 1; i < NumProcs; i++) { + assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 && + "Itineraries table is not sorted"); + } +#endif + + // Find entry + SubtargetInfoKV KV; + KV.Key = CPU.data(); + const SubtargetInfoKV *Found = + std::lower_bound(ProcItins, ProcItins+NumProcs, KV); + if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) { + errs() << "'" << CPU + << "' is not a recognized processor for this target" + << " (ignoring processor)\n"; + return InstrItineraryData(); + } + + return InstrItineraryData(Stages, OperandCycles, ForwardingPathes, + (InstrItinerary *)Found->Value); +} diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 9453f5c2a963..e698384a49f1 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -225,9 +225,9 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer, // Switch sections (the static function above is meant to be called from // here and from Emit(). MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + const TargetAsmInfo &TAI = context.getTargetAsmInfo(); const MCSection *xdataSect = - asmInfo.getWin64EHTableSection(GetSectionSuffix(info->Function)); + TAI.getWin64EHTableSection(GetSectionSuffix(info->Function)); streamer.SwitchSection(xdataSect); llvm::EmitUnwindInfo(streamer, info); @@ -236,11 +236,11 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer, void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) { MCContext &context = streamer.getContext(); // Emit the unwind info structs first. - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + const TargetAsmInfo &TAI = context.getTargetAsmInfo(); for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); const MCSection *xdataSect = - asmInfo.getWin64EHTableSection(GetSectionSuffix(info.Function)); + TAI.getWin64EHTableSection(GetSectionSuffix(info.Function)); streamer.SwitchSection(xdataSect); llvm::EmitUnwindInfo(streamer, &info); } @@ -248,7 +248,7 @@ void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) { for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); const MCSection *pdataSect = - asmInfo.getWin64EHFuncTableSection(GetSectionSuffix(info.Function)); + TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function)); streamer.SwitchSection(pdataSect); EmitRuntimeFunction(streamer, &info); } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index f049b1c6e2a4..69efe231ad6e 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -23,34 +23,12 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetAsmBackend.h" -// FIXME: Gross. -#include "../Target/ARM/ARMFixupKinds.h" -#include "../Target/X86/X86FixupKinds.h" - #include <vector> using namespace llvm; using namespace llvm::object; -// FIXME: this has been copied from (or to) X86AsmBackend.cpp -static unsigned getFixupKindLog2Size(unsigned Kind) { - switch (Kind) { - default: - llvm_unreachable("invalid fixup kind!"); - case FK_PCRel_1: - case FK_Data_1: return 0; - case FK_PCRel_2: - case FK_Data_2: return 1; - case FK_PCRel_4: - // FIXME: Remove these!!! - case X86::reloc_riprel_4byte: - case X86::reloc_riprel_4byte_movq_load: - case X86::reloc_signed_4byte: - case FK_Data_4: return 2; - case FK_Data_8: return 3; - } -} - -static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { +bool MachObjectWriter:: +doesSymbolRequireExternRelocation(const MCSymbolData *SD) { // Undefined symbols are always extern. if (SD->Symbol->isUndefined()) return true; @@ -64,1557 +42,740 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { return false; } -namespace { - -class MachObjectWriter : public MCObjectWriter { - /// MachSymbolData - Helper struct for containing some precomputed information - /// on symbols. - struct MachSymbolData { - MCSymbolData *SymbolData; - uint64_t StringIndex; - uint8_t SectionIndex; +bool MachObjectWriter:: +MachSymbolData::operator<(const MachSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); +} - // Support lexicographic sorting. - bool operator<(const MachSymbolData &RHS) const { - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); - } - }; +bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { + const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( + (MCFixupKind) Kind); - /// The target specific Mach-O writer instance. - llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter; + return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; +} - /// @name Relocation Data - /// @{ +uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment, + const MCAsmLayout &Layout) const { + return getSectionAddress(Fragment->getParent()) + + Layout.getFragmentOffset(Fragment); +} - llvm::DenseMap<const MCSectionData*, - std::vector<macho::RelocationEntry> > Relocations; - llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; +uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD, + const MCAsmLayout &Layout) const { + const MCSymbol &S = SD->getSymbol(); + + // If this is a variable, then recursively evaluate now. + if (S.isVariable()) { + MCValue Target; + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + // Verify that any used symbols are defined. + if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymA()->getSymbol().getName() + "'"); + if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymB()->getSymbol().getName() + "'"); + + uint64_t Address = Target.getConstant(); + if (Target.getSymA()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymA()->getSymbol()), Layout); + if (Target.getSymB()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymB()->getSymbol()), Layout); + return Address; + } - /// @} - /// @name Symbol Table Data - /// @{ + return getSectionAddress(SD->getFragment()->getParent()) + + Layout.getSymbolOffset(SD); +} - SmallString<256> StringTable; - std::vector<MachSymbolData> LocalSymbolData; - std::vector<MachSymbolData> ExternalSymbolData; - std::vector<MachSymbolData> UndefinedSymbolData; +uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD, + const MCAsmLayout &Layout) const { + uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); + unsigned Next = SD->getLayoutOrder() + 1; + if (Next >= Layout.getSectionOrder().size()) + return 0; + + const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; + if (NextSD.getSection().isVirtualSection()) + return 0; + return OffsetToAlignment(EndAddr, NextSD.getAlignment()); +} - /// @} +void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, + unsigned LoadCommandsSize, + bool SubsectionsViaSymbols) { + uint32_t Flags = 0; -private: - /// @name Utility Methods - /// @{ + if (SubsectionsViaSymbols) + Flags |= macho::HF_SubsectionsViaSymbols; - bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { - const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( - (MCFixupKind) Kind); + // struct mach_header (28 bytes) or + // struct mach_header_64 (32 bytes) - return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; - } + uint64_t Start = OS.tell(); + (void) Start; - /// @} + Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32); - SectionAddrMap SectionAddress; - uint64_t getSectionAddress(const MCSectionData* SD) const { - return SectionAddress.lookup(SD); - } - uint64_t getSymbolAddress(const MCSymbolData* SD, - const MCAsmLayout &Layout) const { - const MCSymbol &S = SD->getSymbol(); - - // If this is a variable, then recursively evaluate now. - if (S.isVariable()) { - MCValue Target; - if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout)) - report_fatal_error("unable to evaluate offset for variable '" + - S.getName() + "'"); - - // Verify that any used symbols are defined. - if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) - report_fatal_error("unable to evaluate offset to undefined symbol '" + - Target.getSymA()->getSymbol().getName() + "'"); - if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) - report_fatal_error("unable to evaluate offset to undefined symbol '" + - Target.getSymB()->getSymbol().getName() + "'"); - - uint64_t Address = Target.getConstant(); - if (Target.getSymA()) - Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( - Target.getSymA()->getSymbol()), Layout); - if (Target.getSymB()) - Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( - Target.getSymB()->getSymbol()), Layout); - return Address; - } + Write32(TargetObjectWriter->getCPUType()); + Write32(TargetObjectWriter->getCPUSubtype()); - return getSectionAddress(SD->getFragment()->getParent()) + - Layout.getSymbolOffset(SD); - } - uint64_t getFragmentAddress(const MCFragment *Fragment, - const MCAsmLayout &Layout) const { - return getSectionAddress(Fragment->getParent()) + - Layout.getFragmentOffset(Fragment); - } + Write32(macho::HFT_Object); + Write32(NumLoadCommands); + Write32(LoadCommandsSize); + Write32(Flags); + if (is64Bit()) + Write32(0); // reserved - uint64_t getPaddingSize(const MCSectionData *SD, - const MCAsmLayout &Layout) const { - uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD); - unsigned Next = SD->getLayoutOrder() + 1; - if (Next >= Layout.getSectionOrder().size()) - return 0; - - const MCSectionData &NextSD = *Layout.getSectionOrder()[Next]; - if (NextSD.getSection().isVirtualSection()) - return 0; - return OffsetToAlignment(EndAddr, NextSD.getAlignment()); - } + assert(OS.tell() - Start == + (is64Bit() ? macho::Header64Size : macho::Header32Size)); +} -public: - MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS, - bool _IsLittleEndian) - : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) { +/// WriteSegmentLoadCommand - Write a segment load command. +/// +/// \arg NumSections - The number of sections in this segment. +/// \arg SectionDataSize - The total size of the sections. +void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, + uint64_t VMSize, + uint64_t SectionDataStartOffset, + uint64_t SectionDataSize) { + // struct segment_command (56 bytes) or + // struct segment_command_64 (72 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + unsigned SegmentLoadCommandSize = + is64Bit() ? macho::SegmentLoadCommand64Size: + macho::SegmentLoadCommand32Size; + Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment); + Write32(SegmentLoadCommandSize + + NumSections * (is64Bit() ? macho::Section64Size : + macho::Section32Size)); + + WriteBytes("", 16); + if (is64Bit()) { + Write64(0); // vmaddr + Write64(VMSize); // vmsize + Write64(SectionDataStartOffset); // file offset + Write64(SectionDataSize); // file size + } else { + Write32(0); // vmaddr + Write32(VMSize); // vmsize + Write32(SectionDataStartOffset); // file offset + Write32(SectionDataSize); // file size } + Write32(0x7); // maxprot + Write32(0x7); // initprot + Write32(NumSections); + Write32(0); // flags - /// @name Target Writer Proxy Accessors - /// @{ + assert(OS.tell() - Start == SegmentLoadCommandSize); +} - bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - bool isARM() const { - uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask; - return CPUType == mach::CTM_ARM; +void MachObjectWriter::WriteSection(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCSectionData &SD, + uint64_t FileOffset, + uint64_t RelocationsStart, + unsigned NumRelocations) { + uint64_t SectionSize = Layout.getSectionAddressSize(&SD); + + // The offset is unused for virtual sections. + if (SD.getSection().isVirtualSection()) { + assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); + FileOffset = 0; } - /// @} - - void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, - bool SubsectionsViaSymbols) { - uint32_t Flags = 0; - - if (SubsectionsViaSymbols) - Flags |= macho::HF_SubsectionsViaSymbols; - - // struct mach_header (28 bytes) or - // struct mach_header_64 (32 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32); - - Write32(TargetObjectWriter->getCPUType()); - Write32(TargetObjectWriter->getCPUSubtype()); - - Write32(macho::HFT_Object); - Write32(NumLoadCommands); - Write32(LoadCommandsSize); - Write32(Flags); - if (is64Bit()) - Write32(0); // reserved - - assert(OS.tell() - Start == - (is64Bit() ? macho::Header64Size : macho::Header32Size)); + // struct section (68 bytes) or + // struct section_64 (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); + WriteBytes(Section.getSectionName(), 16); + WriteBytes(Section.getSegmentName(), 16); + if (is64Bit()) { + Write64(getSectionAddress(&SD)); // address + Write64(SectionSize); // size + } else { + Write32(getSectionAddress(&SD)); // address + Write32(SectionSize); // size } + Write32(FileOffset); + + unsigned Flags = Section.getTypeAndAttributes(); + if (SD.hasInstructions()) + Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; + + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); + Write32(Log2_32(SD.getAlignment())); + Write32(NumRelocations ? RelocationsStart : 0); + Write32(NumRelocations); + Write32(Flags); + Write32(IndirectSymBase.lookup(&SD)); // reserved1 + Write32(Section.getStubSize()); // reserved2 + if (is64Bit()) + Write32(0); // reserved3 + + assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : + macho::Section32Size)); +} - /// WriteSegmentLoadCommand - Write a segment load command. - /// - /// \arg NumSections - The number of sections in this segment. - /// \arg SectionDataSize - The total size of the sections. - void WriteSegmentLoadCommand(unsigned NumSections, - uint64_t VMSize, - uint64_t SectionDataStartOffset, - uint64_t SectionDataSize) { - // struct segment_command (56 bytes) or - // struct segment_command_64 (72 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - unsigned SegmentLoadCommandSize = - is64Bit() ? macho::SegmentLoadCommand64Size: - macho::SegmentLoadCommand32Size; - Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment); - Write32(SegmentLoadCommandSize + - NumSections * (is64Bit() ? macho::Section64Size : - macho::Section32Size)); - - WriteBytes("", 16); - if (is64Bit()) { - Write64(0); // vmaddr - Write64(VMSize); // vmsize - Write64(SectionDataStartOffset); // file offset - Write64(SectionDataSize); // file size - } else { - Write32(0); // vmaddr - Write32(VMSize); // vmsize - Write32(SectionDataStartOffset); // file offset - Write32(SectionDataSize); // file size - } - Write32(0x7); // maxprot - Write32(0x7); // initprot - Write32(NumSections); - Write32(0); // flags +void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, + uint32_t NumSymbols, + uint32_t StringTableOffset, + uint32_t StringTableSize) { + // struct symtab_command (24 bytes) - assert(OS.tell() - Start == SegmentLoadCommandSize); - } + uint64_t Start = OS.tell(); + (void) Start; - void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCSectionData &SD, uint64_t FileOffset, - uint64_t RelocationsStart, unsigned NumRelocations) { - uint64_t SectionSize = Layout.getSectionAddressSize(&SD); + Write32(macho::LCT_Symtab); + Write32(macho::SymtabLoadCommandSize); + Write32(SymbolOffset); + Write32(NumSymbols); + Write32(StringTableOffset); + Write32(StringTableSize); - // The offset is unused for virtual sections. - if (SD.getSection().isVirtualSection()) { - assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); - FileOffset = 0; - } + assert(OS.tell() - Start == macho::SymtabLoadCommandSize); +} - // struct section (68 bytes) or - // struct section_64 (80 bytes) +void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, + uint32_t NumLocalSymbols, + uint32_t FirstExternalSymbol, + uint32_t NumExternalSymbols, + uint32_t FirstUndefinedSymbol, + uint32_t NumUndefinedSymbols, + uint32_t IndirectSymbolOffset, + uint32_t NumIndirectSymbols) { + // struct dysymtab_command (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(macho::LCT_Dysymtab); + Write32(macho::DysymtabLoadCommandSize); + Write32(FirstLocalSymbol); + Write32(NumLocalSymbols); + Write32(FirstExternalSymbol); + Write32(NumExternalSymbols); + Write32(FirstUndefinedSymbol); + Write32(NumUndefinedSymbols); + Write32(0); // tocoff + Write32(0); // ntoc + Write32(0); // modtaboff + Write32(0); // nmodtab + Write32(0); // extrefsymoff + Write32(0); // nextrefsyms + Write32(IndirectSymbolOffset); + Write32(NumIndirectSymbols); + Write32(0); // extreloff + Write32(0); // nextrel + Write32(0); // locreloff + Write32(0); // nlocrel + + assert(OS.tell() - Start == macho::DysymtabLoadCommandSize); +} - uint64_t Start = OS.tell(); - (void) Start; +void MachObjectWriter::WriteNlist(MachSymbolData &MSD, + const MCAsmLayout &Layout) { + MCSymbolData &Data = *MSD.SymbolData; + const MCSymbol &Symbol = Data.getSymbol(); + uint8_t Type = 0; + uint16_t Flags = Data.getFlags(); + uint32_t Address = 0; - const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); - WriteBytes(Section.getSectionName(), 16); - WriteBytes(Section.getSegmentName(), 16); - if (is64Bit()) { - Write64(getSectionAddress(&SD)); // address - Write64(SectionSize); // size + // Set the N_TYPE bits. See <mach-o/nlist.h>. + // + // FIXME: Are the prebound or indirect fields possible here? + if (Symbol.isUndefined()) + Type = macho::STT_Undefined; + else if (Symbol.isAbsolute()) + Type = macho::STT_Absolute; + else + Type = macho::STT_Section; + + // FIXME: Set STAB bits. + + if (Data.isPrivateExtern()) + Type |= macho::STF_PrivateExtern; + + // Set external bit. + if (Data.isExternal() || Symbol.isUndefined()) + Type |= macho::STF_External; + + // Compute the symbol address. + if (Symbol.isDefined()) { + if (Symbol.isAbsolute()) { + Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); } else { - Write32(getSectionAddress(&SD)); // address - Write32(SectionSize); // size + Address = getSymbolAddress(&Data, Layout); + } + } else if (Data.isCommon()) { + // Common symbols are encoded with the size in the address + // field, and their alignment in the flags. + Address = Data.getCommonSize(); + + // Common alignment is packed into the 'desc' bits. + if (unsigned Align = Data.getCommonAlignment()) { + unsigned Log2Size = Log2_32(Align); + assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); + if (Log2Size > 15) + report_fatal_error("invalid 'common' alignment '" + + Twine(Align) + "'"); + // FIXME: Keep this mask with the SymbolFlags enumeration. + Flags = (Flags & 0xF0FF) | (Log2Size << 8); } - Write32(FileOffset); - - unsigned Flags = Section.getTypeAndAttributes(); - if (SD.hasInstructions()) - Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; - - assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); - Write32(Log2_32(SD.getAlignment())); - Write32(NumRelocations ? RelocationsStart : 0); - Write32(NumRelocations); - Write32(Flags); - Write32(IndirectSymBase.lookup(&SD)); // reserved1 - Write32(Section.getStubSize()); // reserved2 - if (is64Bit()) - Write32(0); // reserved3 - - assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : - macho::Section32Size)); } - void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, - uint32_t StringTableOffset, - uint32_t StringTableSize) { - // struct symtab_command (24 bytes) + // struct nlist (12 bytes) - uint64_t Start = OS.tell(); - (void) Start; + Write32(MSD.StringIndex); + Write8(Type); + Write8(MSD.SectionIndex); - Write32(macho::LCT_Symtab); - Write32(macho::SymtabLoadCommandSize); - Write32(SymbolOffset); - Write32(NumSymbols); - Write32(StringTableOffset); - Write32(StringTableSize); - - assert(OS.tell() - Start == macho::SymtabLoadCommandSize); - } - - void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, - uint32_t NumLocalSymbols, - uint32_t FirstExternalSymbol, - uint32_t NumExternalSymbols, - uint32_t FirstUndefinedSymbol, - uint32_t NumUndefinedSymbols, - uint32_t IndirectSymbolOffset, - uint32_t NumIndirectSymbols) { - // struct dysymtab_command (80 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - Write32(macho::LCT_Dysymtab); - Write32(macho::DysymtabLoadCommandSize); - Write32(FirstLocalSymbol); - Write32(NumLocalSymbols); - Write32(FirstExternalSymbol); - Write32(NumExternalSymbols); - Write32(FirstUndefinedSymbol); - Write32(NumUndefinedSymbols); - Write32(0); // tocoff - Write32(0); // ntoc - Write32(0); // modtaboff - Write32(0); // nmodtab - Write32(0); // extrefsymoff - Write32(0); // nextrefsyms - Write32(IndirectSymbolOffset); - Write32(NumIndirectSymbols); - Write32(0); // extreloff - Write32(0); // nextrel - Write32(0); // locreloff - Write32(0); // nlocrel - - assert(OS.tell() - Start == macho::DysymtabLoadCommandSize); - } - - void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { - MCSymbolData &Data = *MSD.SymbolData; - const MCSymbol &Symbol = Data.getSymbol(); - uint8_t Type = 0; - uint16_t Flags = Data.getFlags(); - uint32_t Address = 0; + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' + // value. + Write16(Flags); + if (is64Bit()) + Write64(Address); + else + Write32(Address); +} - // Set the N_TYPE bits. See <mach-o/nlist.h>. - // - // FIXME: Are the prebound or indirect fields possible here? - if (Symbol.isUndefined()) - Type = macho::STT_Undefined; - else if (Symbol.isAbsolute()) - Type = macho::STT_Absolute; - else - Type = macho::STT_Section; - - // FIXME: Set STAB bits. - - if (Data.isPrivateExtern()) - Type |= macho::STF_PrivateExtern; - - // Set external bit. - if (Data.isExternal() || Symbol.isUndefined()) - Type |= macho::STF_External; - - // Compute the symbol address. - if (Symbol.isDefined()) { - if (Symbol.isAbsolute()) { - Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); - } else { - Address = getSymbolAddress(&Data, Layout); - } - } else if (Data.isCommon()) { - // Common symbols are encoded with the size in the address - // field, and their alignment in the flags. - Address = Data.getCommonSize(); - - // Common alignment is packed into the 'desc' bits. - if (unsigned Align = Data.getCommonAlignment()) { - unsigned Log2Size = Log2_32(Align); - assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); - if (Log2Size > 15) - report_fatal_error("invalid 'common' alignment '" + - Twine(Align) + "'"); - // FIXME: Keep this mask with the SymbolFlags enumeration. - Flags = (Flags & 0xF0FF) | (Log2Size << 8); - } - } +void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, + Target, FixedValue); +} - // struct nlist (12 bytes) +void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { + // This is the point where 'as' creates actual symbols for indirect symbols + // (in the following two passes). It would be easier for us to do this sooner + // when we see the attribute, but that makes getting the order in the symbol + // table much more complicated than it is worth. + // + // FIXME: Revisit this when the dust settles. - Write32(MSD.StringIndex); - Write8(Type); - Write8(MSD.SectionIndex); + // Bind non lazy symbol pointers first. + unsigned IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast<MCSectionMachO>(it->SectionData->getSection()); - // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' - // value. - Write16(Flags); - if (is64Bit()) - Write64(Address); - else - Write32(Address); - } + if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) + continue; - // FIXME: We really need to improve the relocation validation. Basically, we - // want to implement a separate computation which evaluates the relocation - // entry as the linker would, and verifies that the resultant fixup value is - // exactly what the encoder wanted. This will catch several classes of - // problems: - // - // - Relocation entry bugs, the two algorithms are unlikely to have the same - // exact bug. - // - // - Relaxation issues, where we forget to relax something. - // - // - Input errors, where something cannot be correctly encoded. 'as' allows - // these through in many cases. + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; - static bool isFixupKindRIPRel(unsigned Kind) { - return Kind == X86::reloc_riprel_4byte || - Kind == X86::reloc_riprel_4byte_movq_load; + Asm.getOrCreateSymbolData(*it->Symbol); } - void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - - // See <reloc.h>. - uint32_t FixupOffset = - Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - uint32_t FixupAddress = - getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - int64_t Value = 0; - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - Value = Target.getConstant(); - - if (IsPCRel) { - // Compensate for the relocation offset, Darwin x86_64 relocations only - // have the addend and appear to have attempted to define it to be the - // actual expression addend without the PCrel bias. However, instructions - // with data following the relocation are not accommodated for (see comment - // below regarding SIGNED{1,2,4}), so it isn't exactly that either. - Value += 1LL << Log2Size; - } - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - Type = macho::RIT_X86_64_Unsigned; - Index = 0; - - // FIXME: I believe this is broken, I don't think the linker can - // understand it. I think it would require a local relocation, but I'm not - // sure if that would work either. The official way to get an absolute - // PCrel relocation is to use an absolute symbol (which we don't support - // yet). - if (IsPCRel) { - IsExtern = 1; - Type = macho::RIT_X86_64_Branch; - } - } else if (Target.getSymB()) { // A - B + constant - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); - - const MCSymbol *B = &Target.getSymB()->getSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); - - // Neither symbol can be modified. - if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol"); - - // We don't support PCrel relocations of differences. Darwin 'as' doesn't - // implement most of these correctly. - if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference"); - - // The support for the situation where one or both of the symbols would - // require a local relocation is handled just like if the symbols were - // external. This is certainly used in the case of debug sections where - // the section has only temporary symbols and thus the symbols don't have - // base symbols. This is encoded using the section ordinal and - // non-extern relocation entries. - - // Darwin 'as' doesn't emit correct relocations for this (it ends up with - // a single SIGNED relocation); reject it for now. Except the case where - // both symbols don't have a base, equal but both NULL. - if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base"); - - Value += getSymbolAddress(&A_SD, Layout) - - (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout)); - Value -= getSymbolAddress(&B_SD, Layout) - - (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout)); - - if (A_Base) { - Index = A_Base->getIndex(); - IsExtern = 1; - } - else { - Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - } - Type = macho::RIT_X86_64_Unsigned; - - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); - - if (B_Base) { - Index = B_Base->getIndex(); - IsExtern = 1; - } - else { - Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - } - Type = macho::RIT_X86_64_Subtractor; - } else { - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); - - // Relocations inside debug sections always use local relocations when - // possible. This seems to be done because the debugger doesn't fully - // understand x86_64 relocation entries, and expects to find values that - // have already been fixed up. - if (Symbol->isInSection()) { - const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( - Fragment->getParent()->getSection()); - if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) - Base = 0; - } + // Then lazy symbol pointers and symbol stubs. + IndirectIndex = 0; + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { + const MCSectionMachO &Section = + cast<MCSectionMachO>(it->SectionData->getSection()); - // x86_64 almost always uses external relocations, except when there is no - // symbol to use as a base address (a local symbol with no preceding - // non-local symbol). - if (Base) { - Index = Base->getIndex(); - IsExtern = 1; - - // Add the local offset, if needed. - if (Base != &SD) - Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection() && !Symbol->isVariable()) { - // The index is the section ordinal (1-based). - Index = SD.getFragment()->getParent()->getOrdinal() + 1; - IsExtern = 0; - Value += getSymbolAddress(&SD, Layout); - - if (IsPCRel) - Value -= FixupAddress + (1 << Log2Size); - } else if (Symbol->isVariable()) { - const MCExpr *Value = Symbol->getVariableValue(); - int64_t Res; - bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress); - if (isAbs) { - FixedValue = Res; - return; - } else { - report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'"); - } - } else { - report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'"); - } + if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) + continue; - MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); - if (IsPCRel) { - if (IsRIPRel) { - if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { - // x86_64 distinguishes movq foo@GOTPCREL so that the linker can - // rewrite the movq to an leaq at link time if the symbol ends up in - // the same linkage unit. - if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) - Type = macho::RIT_X86_64_GOTLoad; - else - Type = macho::RIT_X86_64_GOT; - } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - Type = macho::RIT_X86_64_TLV; - } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation"); - } else { - Type = macho::RIT_X86_64_Signed; - - // The Darwin x86_64 relocation format has a problem where it cannot - // encode an address (L<foo> + <constant>) which is outside the atom - // containing L<foo>. Generally, this shouldn't occur but it does - // happen when we have a RIPrel instruction with data following the - // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel - // adjustment Darwin x86_64 uses, the offset is still negative and - // the linker has no way to recognize this. - // - // To work around this, Darwin uses several special relocation types - // to indicate the offsets. However, the specification or - // implementation of these seems to also be incomplete; they should - // adjust the addend as well based on the actual encoded instruction - // (the additional bias), but instead appear to just look at the - // final offset. - switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = macho::RIT_X86_64_Signed1; break; - case 2: Type = macho::RIT_X86_64_Signed2; break; - case 4: Type = macho::RIT_X86_64_Signed4; break; - } - } - } else { - if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in branch " - "relocation"); - - Type = macho::RIT_X86_64_Branch; - } - } else { - if (Modifier == MCSymbolRefExpr::VK_GOT) { - Type = macho::RIT_X86_64_GOT; - } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { - // GOTPCREL is allowed as a modifier on non-PCrel instructions, in - // which case all we do is set the PCrel bit in the relocation entry; - // this is used with exception handling, for example. The source is - // required to include any necessary offset directly. - Type = macho::RIT_X86_64_GOT; - IsPCRel = 1; - } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel"); - } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation"); - else - Type = macho::RIT_X86_64_Unsigned; - } - } + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; - // x86_64 always writes custom values into the fixups. - FixedValue = Value; - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); + // Set the symbol type to undefined lazy, but only on construction. + // + // FIXME: Do not hardcode. + bool Created; + MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); + if (Created) + Entry.setFlags(Entry.getFlags() | 0x0001); } +} - void RecordScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - unsigned Log2Size, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; - - // See <reloc.h>. - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - uint32_t Value2 = 0; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - // - // Note that there is no longer any semantic difference between these two - // relocation types from the linkers point of view, this is done solely - // for pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : - (unsigned)macho::RIT_Generic_LocalDifference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } - - // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); +/// ComputeSymbolTable - Compute the symbol table data +/// +/// \param StringTable [out] - The string table data. +/// \param StringIndexMap [out] - Map from symbol names to offsets in the +/// string table. +void MachObjectWriter:: +ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, + std::vector<MachSymbolData> &LocalSymbolData, + std::vector<MachSymbolData> &ExternalSymbolData, + std::vector<MachSymbolData> &UndefinedSymbolData) { + // Build section lookup table. + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); + + // Index 0 is always the empty string. + StringMap<uint64_t> StringIndexMap; + StringTable += '\x00'; + + // Build the symbol arrays and the string table, but only for non-local + // symbols. + // + // The particular order that we collect the symbols and create the string + // table, then sort the symbols is chosen to match 'as'. Even though it + // doesn't matter for correctness, this is important for letting us diff .o + // files. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); - } - - void RecordARMScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - unsigned Log2Size, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; - - // See <reloc.h>. - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - uint32_t Value2 = 0; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - Type = macho::RIT_Difference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; - // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); } - - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); } - void RecordARMMovwMovtRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_ARM_Half; - - // See <reloc.h>. - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = getSymbolAddress(A_SD, Layout); - uint32_t Value2 = 0; - uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent()); - FixedValue += SecAddr; - - if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); - - if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - Type = macho::RIT_ARM_HalfDifference; - Value2 = getSymbolAddress(B_SD, Layout); - FixedValue -= getSectionAddress(B_SD->getFragment()->getParent()); - } - - // Relocations are written out in reverse order, so the PAIR comes first. - // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field: - // - // For these two r_type relocations they always have a pair following them - // and the r_length bits are used differently. The encoding of the - // r_length is as follows: - // low bit of r_length: - // 0 - :lower16: for movw instructions - // 1 - :upper16: for movt instructions - // high bit of r_length: - // 0 - arm instructions - // 1 - thumb instructions - // the other half of the relocated expression is in the following pair - // relocation entry in the the low 16 bits of r_address field. - unsigned ThumbBit = 0; - unsigned MovtBit = 0; - switch ((unsigned)Fixup.getKind()) { - default: break; - case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: - MovtBit = 1; - break; - case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: - MovtBit = 1; - // Fallthrough - case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: - ThumbBit = 1; - break; - } + // Now add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + continue; - if (Type == macho::RIT_ARM_HalfDifference) { - uint32_t OtherHalf = MovtBit - ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); + if (it->isExternal() || Symbol.isUndefined()) + continue; - macho::RelocationEntry MRE; - MRE.Word0 = ((OtherHalf << 0) | - (macho::RIT_Pair << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; - Relocations[Fragment->getParent()].push_back(MRE); + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; - Relocations[Fragment->getParent()].push_back(MRE); - } + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; - void RecordTLVPRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && - !is64Bit() && - "Should only be called with a 32-bit TLVP relocation!"); - - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned IsPCRel = 0; - - // Get the symbol data. - MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - unsigned Index = SD_A->getIndex(); - - // We're only going to have a second symbol in pic mode and it'll be a - // subtraction from the picbase. For 32-bit pic the addend is the difference - // between the picbase and the next address. For 32-bit static the addend - // is zero. - if (Target.getSymB()) { - // If this is a subtraction then we're pcrel. - uint32_t FixupAddress = - getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); - IsPCRel = 1; - FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) + - Target.getConstant()); - FixedValue += 1ULL << Log2Size; + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); } else { - FixedValue = 0; + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = Value; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (1 << 27) | // Extern - (macho::RIT_Generic_TLV << 28)); // Type - Relocations[Fragment->getParent()].push_back(MRE); } - static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, - unsigned &Log2Size) { - RelocType = unsigned(macho::RIT_Vanilla); - Log2Size = ~0U; + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + // The string table is padded to a multiple of 4. + while (StringTable.size() % 4) + StringTable += '\x00'; +} - switch (Kind) { - default: - return false; +void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + uint64_t StartAddress = 0; + const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); + for (int i = 0, n = Order.size(); i != n ; ++i) { + const MCSectionData *SD = Order[i]; + StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); + SectionAddress[SD] = StartAddress; + StartAddress += Layout.getSectionAddressSize(SD); + + // Explicitly pad the section to match the alignment requirements of the + // following one. This is for 'gas' compatibility, it shouldn't + /// strictly be necessary. + StartAddress += getPaddingSize(SD, Layout); + } +} - case FK_Data_1: - Log2Size = llvm::Log2_32(1); - return true; - case FK_Data_2: - Log2Size = llvm::Log2_32(2); - return true; - case FK_Data_4: - Log2Size = llvm::Log2_32(4); - return true; - case FK_Data_8: - Log2Size = llvm::Log2_32(8); - return true; +void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) { + computeSectionAddresses(Asm, Layout); - // Handle 24-bit branch kinds. - case ARM::fixup_arm_ldst_pcrel_12: - case ARM::fixup_arm_pcrel_10: - case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_condbranch: - case ARM::fixup_arm_uncondbranch: - RelocType = unsigned(macho::RIT_ARM_Branch24Bit); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; + // Create symbol data for any indirect symbols. + BindIndirectSymbols(Asm); - // Handle Thumb branches. - case ARM::fixup_arm_thumb_br: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); - Log2Size = llvm::Log2_32(2); - return true; - - case ARM::fixup_arm_thumb_bl: - case ARM::fixup_arm_thumb_blx: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); - Log2Size = llvm::Log2_32(4); - return true; + // Compute symbol table information and bind symbol indices. + ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); +} - case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: - case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_HalfDifference); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); - return true; +bool MachObjectWriter:: +IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbolData &DataA, + const MCFragment &FB, + bool InSet, + bool IsPCRel) const { + if (InSet) + return true; - case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movw_lo16_pcrel: - case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + // The effective address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) == 0. + const MCSymbolData *A_Base = 0, *B_Base = 0; + + const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); + const MCSection &SecA = SA.getSection(); + const MCSection &SecB = FB.getParent()->getSection(); + + if (IsPCRel) { + // The simple (Darwin, except on x86_64) way of dealing with this was to + // assume that any reference to a temporary symbol *must* be a temporary + // symbol in the same atom, unless the sections differ. Therefore, any PCrel + // relocation to a temporary symbol (in the same section) is fully + // resolved. This also works in conjunction with absolutized .set, which + // requires the compiler to use .set to absolutize the differences between + // symbols which the compiler knows to be assembly time constants, so we + // don't need to worry about considering symbol differences fully resolved. + + if (!Asm.getBackend().hasReliableSymbolDifference()) { + if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB) + return false; return true; } + } else { + if (!TargetObjectWriter->useAggressiveSymbolFolding()) + return false; } - void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size; - unsigned RelocType = macho::RIT_Vanilla; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - return; - } - // If this is a difference or a defined symbol plus an offset, then we need - // a scattered relocation entry. Differences always require scattered - // relocations. - if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half || - RelocType == macho::RIT_ARM_HalfDifference) - return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup, - Target, FixedValue); - return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); - } + const MCFragment &FA = *Asm.getSymbolData(SA).getFragment(); - // Get the symbol data, if any. - MCSymbolData *SD = 0; - if (Target.getSymA()) - SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + A_Base = FA.getAtom(); + if (!A_Base) + return false; - // FIXME: For other platforms, we need to use scattered relocations for - // internal relocations with offsets. If this is an internal relocation - // with an offset, it also needs a scattered relocation entry. - // - // Is this right for ARM? - uint32_t Offset = Target.getConstant(); - if (IsPCRel && RelocType == macho::RIT_Vanilla) - Offset += 1 << Log2Size; - if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) - return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target, - Log2Size, FixedValue); - - // See <reloc.h>. - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // FIXME! - report_fatal_error("FIXME: relocations to absolute targets " - "not yet implemented"); - } else { - // Resolve constant variables. - if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; - } - } + B_Base = FB.getAtom(); + if (!B_Base) + return false; - // Check whether we need an external or internal relocation. - if (doesSymbolRequireExternRelocation(SD)) { - IsExtern = 1; - Index = SD->getIndex(); - // For external relocations, make sure to offset the fixup value to - // compensate for the addend of the symbol address, if it was - // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) - FixedValue -= Layout.getSymbolOffset(SD); - } else { - // The index is the section ordinal (1-based). - const MCSectionData &SymSD = Asm.getSectionData( - SD->getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - FixedValue += getSectionAddress(&SymSD); - } - if (IsPCRel) - FixedValue -= getSectionAddress(Fragment->getParent()); + // If the atoms are the same, they are guaranteed to have the same address. + if (A_Base == B_Base) + return true; - // The type is determined by the fixup kind. - Type = RelocType; - } + // Otherwise, we can't prove this is fully resolved. + return false; +} - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); +void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { + unsigned NumSections = Asm.size(); + + // The section data starts after the header, the segment load command (and + // section headers) and the symbol table. + unsigned NumLoadCommands = 1; + uint64_t LoadCommandsSize = is64Bit() ? + macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : + macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; + + // Add the symbol table load command sizes, if used. + unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + + UndefinedSymbolData.size(); + if (NumSymbols) { + NumLoadCommands += 2; + LoadCommandsSize += (macho::SymtabLoadCommandSize + + macho::DysymtabLoadCommandSize); } - void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) { - // FIXME: These needs to be factored into the target Mach-O writer. - if (isARM()) { - RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - if (is64Bit()) { - RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - - unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); - - // If this is a 32-bit TLVP reloc it's handled a bit differently. - if (Target.getSymA() && - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { - RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); - return; - } - - // If this is a difference or a defined symbol plus an offset, then we need - // a scattered relocation entry. - // Differences always require scattered relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); - - // Get the symbol data, if any. - MCSymbolData *SD = 0; - if (Target.getSymA()) - SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); - - // If this is an internal relocation with an offset, it also needs a - // scattered relocation entry. - uint32_t Offset = Target.getConstant(); - if (IsPCRel) - Offset += 1 << Log2Size; - if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); - - // See <reloc.h>. - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - // - // FIXME: Currently, these are never generated (see code below). I cannot - // find a case where they are actually emitted. - Type = macho::RIT_Vanilla; - } else { - // Resolve constant variables. - if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; - } - } - - // Check whether we need an external or internal relocation. - if (doesSymbolRequireExternRelocation(SD)) { - IsExtern = 1; - Index = SD->getIndex(); - // For external relocations, make sure to offset the fixup value to - // compensate for the addend of the symbol address, if it was - // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) - FixedValue -= Layout.getSymbolOffset(SD); - } else { - // The index is the section ordinal (1-based). - const MCSectionData &SymSD = Asm.getSectionData( - SD->getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - FixedValue += getSectionAddress(&SymSD); - } - if (IsPCRel) - FixedValue -= getSectionAddress(Fragment->getParent()); - - Type = macho::RIT_Vanilla; - } - - // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocations[Fragment->getParent()].push_back(MRE); + // Compute the total size of the section data, as well as its file size and vm + // size. + uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : + macho::Header32Size) + LoadCommandsSize; + uint64_t SectionDataSize = 0; + uint64_t SectionDataFileSize = 0; + uint64_t VMSize = 0; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + uint64_t Address = getSectionAddress(&SD); + uint64_t Size = Layout.getSectionAddressSize(&SD); + uint64_t FileSize = Layout.getSectionFileSize(&SD); + FileSize += getPaddingSize(&SD, Layout); + + VMSize = std::max(VMSize, Address + Size); + + if (SD.getSection().isVirtualSection()) + continue; + + SectionDataSize = std::max(SectionDataSize, Address + Size); + SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); } - void BindIndirectSymbols(MCAssembler &Asm) { - // This is the point where 'as' creates actual symbols for indirect symbols - // (in the following two passes). It would be easier for us to do this - // sooner when we see the attribute, but that makes getting the order in the - // symbol table much more complicated than it is worth. - // - // FIXME: Revisit this when the dust settles. + // The section data is padded to 4 bytes. + // + // FIXME: Is this machine dependent? + unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + SectionDataFileSize += SectionDataPadding; + + // Write the prolog, starting with the header and load command... + WriteHeader(NumLoadCommands, LoadCommandsSize, + Asm.getSubsectionsViaSymbols()); + WriteSegmentLoadCommand(NumSections, VMSize, + SectionDataStart, SectionDataSize); + + // ... and then the section headers. + uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; + unsigned NumRelocs = Relocs.size(); + uint64_t SectionStart = SectionDataStart + getSectionAddress(it); + WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); + RelocTableEnd += NumRelocs * macho::RelocationInfoSize; + } - // Bind non lazy symbol pointers first. - unsigned IndirectIndex = 0; - for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { - const MCSectionMachO &Section = - cast<MCSectionMachO>(it->SectionData->getSection()); + // Write the symbol table load command, if used. + if (NumSymbols) { + unsigned FirstLocalSymbol = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); + unsigned NumSymTabSymbols = + NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; + uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; + uint64_t IndirectSymbolOffset = 0; + + // If used, the indirect symbols are written after the section data. + if (NumIndirectSymbols) + IndirectSymbolOffset = RelocTableEnd; + + // The symbol table is written after the indirect symbol data. + uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; + + // The string table is written after symbol table. + uint64_t StringTableOffset = + SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size : + macho::Nlist32Size); + WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + StringTableOffset, StringTable.size()); + + WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + FirstExternalSymbol, NumExternalSymbols, + FirstUndefinedSymbol, NumUndefinedSymbols, + IndirectSymbolOffset, NumIndirectSymbols); + } - if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) - continue; + // Write the actual section data. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + Asm.WriteSectionData(it, Layout); - // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; + uint64_t Pad = getPaddingSize(it, Layout); + for (unsigned int i = 0; i < Pad; ++i) + Write8(0); + } - Asm.getOrCreateSymbolData(*it->Symbol); - } + // Write the extra padding. + WriteZeros(SectionDataPadding); - // Then lazy symbol pointers and symbol stubs. - IndirectIndex = 0; - for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { - const MCSectionMachO &Section = - cast<MCSectionMachO>(it->SectionData->getSection()); - - if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && - Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) - continue; - - // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; - - // Set the symbol type to undefined lazy, but only on construction. - // - // FIXME: Do not hardcode. - bool Created; - MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); - if (Created) - Entry.setFlags(Entry.getFlags() | 0x0001); + // Write the relocation entries. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Write the section relocation entries, in reverse order to match 'as' + // (approximately, the exact algorithm is more complicated than this). + std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + Write32(Relocs[e - i - 1].Word0); + Write32(Relocs[e - i - 1].Word1); } } - /// ComputeSymbolTable - Compute the symbol table data - /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. - void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, - std::vector<MachSymbolData> &LocalSymbolData, - std::vector<MachSymbolData> &ExternalSymbolData, - std::vector<MachSymbolData> &UndefinedSymbolData) { - // Build section lookup table. - DenseMap<const MCSection*, uint8_t> SectionIndexMap; - unsigned Index = 1; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it, ++Index) - SectionIndexMap[&it->getSection()] = Index; - assert(Index <= 256 && "Too many sections!"); - - // Index 0 is always the empty string. - StringMap<uint64_t> StringIndexMap; - StringTable += '\x00'; - - // Build the symbol arrays and the string table, but only for non-local - // symbols. - // - // The particular order that we collect the symbols and create the string - // table, then sort the symbols is chosen to match 'as'. Even though it - // doesn't matter for correctness, this is important for letting us diff .o - // files. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); - - // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it->getSymbol())) - continue; - - if (!it->isExternal() && !Symbol.isUndefined()) - continue; - - uint64_t &Entry = StringIndexMap[Symbol.getName()]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Symbol.getName(); - StringTable += '\x00'; + // Write the symbol table data, if used. + if (NumSymbols) { + // Write the indirect symbol entries. + for (MCAssembler::const_indirect_symbol_iterator + it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // Indirect symbols in the non lazy symbol pointer section have some + // special handling. + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { + // If this symbol is defined and internal, mark it as such. + if (it->Symbol->isDefined() && + !Asm.getSymbolData(*it->Symbol).isExternal()) { + uint32_t Flags = macho::ISF_Local; + if (it->Symbol->isAbsolute()) + Flags |= macho::ISF_Absolute; + Write32(Flags); + continue; + } } - MachSymbolData MSD; - MSD.SymbolData = it; - MSD.StringIndex = Entry; - - if (Symbol.isUndefined()) { - MSD.SectionIndex = 0; - UndefinedSymbolData.push_back(MSD); - } else if (Symbol.isAbsolute()) { - MSD.SectionIndex = 0; - ExternalSymbolData.push_back(MSD); - } else { - MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); - assert(MSD.SectionIndex && "Invalid section index!"); - ExternalSymbolData.push_back(MSD); - } + Write32(Asm.getSymbolData(*it->Symbol).getIndex()); } - // Now add the data for local symbols. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); - - // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it->getSymbol())) - continue; - - if (it->isExternal() || Symbol.isUndefined()) - continue; - - uint64_t &Entry = StringIndexMap[Symbol.getName()]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Symbol.getName(); - StringTable += '\x00'; - } - - MachSymbolData MSD; - MSD.SymbolData = it; - MSD.StringIndex = Entry; - - if (Symbol.isAbsolute()) { - MSD.SectionIndex = 0; - LocalSymbolData.push_back(MSD); - } else { - MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); - assert(MSD.SectionIndex && "Invalid section index!"); - LocalSymbolData.push_back(MSD); - } - } + // FIXME: Check that offsets match computed ones. - // External and undefined symbols are required to be in lexicographic order. - std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); - std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); - - // Set the symbol indices. - Index = 0; + // Write the symbol table entries. for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - LocalSymbolData[i].SymbolData->setIndex(Index++); + WriteNlist(LocalSymbolData[i], Layout); for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - ExternalSymbolData[i].SymbolData->setIndex(Index++); + WriteNlist(ExternalSymbolData[i], Layout); for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - UndefinedSymbolData[i].SymbolData->setIndex(Index++); - - // The string table is padded to a multiple of 4. - while (StringTable.size() % 4) - StringTable += '\x00'; - } - - void computeSectionAddresses(const MCAssembler &Asm, - const MCAsmLayout &Layout) { - uint64_t StartAddress = 0; - const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder(); - for (int i = 0, n = Order.size(); i != n ; ++i) { - const MCSectionData *SD = Order[i]; - StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); - SectionAddress[SD] = StartAddress; - StartAddress += Layout.getSectionAddressSize(SD); - // Explicitly pad the section to match the alignment requirements of the - // following one. This is for 'gas' compatibility, it shouldn't - /// strictly be necessary. - StartAddress += getPaddingSize(SD, Layout); - } - } - - void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { - computeSectionAddresses(Asm, Layout); - - // Create symbol data for any indirect symbols. - BindIndirectSymbols(Asm); - - // Compute symbol table information and bind symbol indices. - ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, - UndefinedSymbolData); - } - - virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, - const MCSymbolData &DataA, - const MCFragment &FB, - bool InSet, - bool IsPCRel) const { - if (InSet) - return true; + WriteNlist(UndefinedSymbolData[i], Layout); - // The effective address is - // addr(atom(A)) + offset(A) - // - addr(atom(B)) - offset(B) - // and the offsets are not relocatable, so the fixup is fully resolved when - // addr(atom(A)) - addr(atom(B)) == 0. - const MCSymbolData *A_Base = 0, *B_Base = 0; - - const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); - const MCSection &SecA = SA.getSection(); - const MCSection &SecB = FB.getParent()->getSection(); - - if (IsPCRel) { - // The simple (Darwin, except on x86_64) way of dealing with this was to - // assume that any reference to a temporary symbol *must* be a temporary - // symbol in the same atom, unless the sections differ. Therefore, any - // PCrel relocation to a temporary symbol (in the same section) is fully - // resolved. This also works in conjunction with absolutized .set, which - // requires the compiler to use .set to absolutize the differences between - // symbols which the compiler knows to be assembly time constants, so we - // don't need to worry about considering symbol differences fully - // resolved. - - if (!Asm.getBackend().hasReliableSymbolDifference()) { - if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB) - return false; - return true; - } - } else { - if (!TargetObjectWriter->useAggressiveSymbolFolding()) - return false; - } - - const MCFragment &FA = *Asm.getSymbolData(SA).getFragment(); - - A_Base = FA.getAtom(); - if (!A_Base) - return false; - - B_Base = FB.getAtom(); - if (!B_Base) - return false; - - // If the atoms are the same, they are guaranteed to have the same address. - if (A_Base == B_Base) - return true; - - // Otherwise, we can't prove this is fully resolved. - return false; + // Write the string table. + OS << StringTable.str(); } - - void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { - unsigned NumSections = Asm.size(); - - // The section data starts after the header, the segment load command (and - // section headers) and the symbol table. - unsigned NumLoadCommands = 1; - uint64_t LoadCommandsSize = is64Bit() ? - macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : - macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; - - // Add the symbol table load command sizes, if used. - unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + - UndefinedSymbolData.size(); - if (NumSymbols) { - NumLoadCommands += 2; - LoadCommandsSize += (macho::SymtabLoadCommandSize + - macho::DysymtabLoadCommandSize); - } - - // Compute the total size of the section data, as well as its file size and - // vm size. - uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : - macho::Header32Size) + LoadCommandsSize; - uint64_t SectionDataSize = 0; - uint64_t SectionDataFileSize = 0; - uint64_t VMSize = 0; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionData &SD = *it; - uint64_t Address = getSectionAddress(&SD); - uint64_t Size = Layout.getSectionAddressSize(&SD); - uint64_t FileSize = Layout.getSectionFileSize(&SD); - FileSize += getPaddingSize(&SD, Layout); - - VMSize = std::max(VMSize, Address + Size); - - if (SD.getSection().isVirtualSection()) - continue; - - SectionDataSize = std::max(SectionDataSize, Address + Size); - SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); - } - - // The section data is padded to 4 bytes. - // - // FIXME: Is this machine dependent? - unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); - SectionDataFileSize += SectionDataPadding; - - // Write the prolog, starting with the header and load command... - WriteHeader(NumLoadCommands, LoadCommandsSize, - Asm.getSubsectionsViaSymbols()); - WriteSegmentLoadCommand(NumSections, VMSize, - SectionDataStart, SectionDataSize); - - // ... and then the section headers. - uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; - unsigned NumRelocs = Relocs.size(); - uint64_t SectionStart = SectionDataStart + getSectionAddress(it); - WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); - RelocTableEnd += NumRelocs * macho::RelocationInfoSize; - } - - // Write the symbol table load command, if used. - if (NumSymbols) { - unsigned FirstLocalSymbol = 0; - unsigned NumLocalSymbols = LocalSymbolData.size(); - unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; - unsigned NumExternalSymbols = ExternalSymbolData.size(); - unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; - unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); - unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); - unsigned NumSymTabSymbols = - NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; - uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; - uint64_t IndirectSymbolOffset = 0; - - // If used, the indirect symbols are written after the section data. - if (NumIndirectSymbols) - IndirectSymbolOffset = RelocTableEnd; - - // The symbol table is written after the indirect symbol data. - uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; - - // The string table is written after symbol table. - uint64_t StringTableOffset = - SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size : - macho::Nlist32Size); - WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, - StringTableOffset, StringTable.size()); - - WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, - FirstExternalSymbol, NumExternalSymbols, - FirstUndefinedSymbol, NumUndefinedSymbols, - IndirectSymbolOffset, NumIndirectSymbols); - } - - // Write the actual section data. - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - Asm.WriteSectionData(it, Layout); - - uint64_t Pad = getPaddingSize(it, Layout); - for (unsigned int i = 0; i < Pad; ++i) - Write8(0); - } - - // Write the extra padding. - WriteZeros(SectionDataPadding); - - // Write the relocation entries. - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - // Write the section relocation entries, in reverse order to match 'as' - // (approximately, the exact algorithm is more complicated than this). - std::vector<macho::RelocationEntry> &Relocs = Relocations[it]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - Write32(Relocs[e - i - 1].Word0); - Write32(Relocs[e - i - 1].Word1); - } - } - - // Write the symbol table data, if used. - if (NumSymbols) { - // Write the indirect symbol entries. - for (MCAssembler::const_indirect_symbol_iterator - it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // Indirect symbols in the non lazy symbol pointer section have some - // special handling. - const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(it->SectionData->getSection()); - if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { - // If this symbol is defined and internal, mark it as such. - if (it->Symbol->isDefined() && - !Asm.getSymbolData(*it->Symbol).isExternal()) { - uint32_t Flags = macho::ISF_Local; - if (it->Symbol->isAbsolute()) - Flags |= macho::ISF_Absolute; - Write32(Flags); - continue; - } - } - - Write32(Asm.getSymbolData(*it->Symbol).getIndex()); - } - - // FIXME: Check that offsets match computed ones. - - // Write the symbol table entries. - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - WriteNlist(LocalSymbolData[i], Layout); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - WriteNlist(ExternalSymbolData[i], Layout); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - WriteNlist(UndefinedSymbolData[i], Layout); - - // Write the string table. - OS << StringTable.str(); - } - } -}; - } MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, diff --git a/lib/Target/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index e0a9de82983f..348cd4c9ab1b 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/SubtargetFeature.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" @@ -27,7 +27,7 @@ using namespace llvm; /// hasFlag - Determine if a feature has a flag; '+' or '-' /// -static inline bool hasFlag(const std::string &Feature) { +static inline bool hasFlag(const StringRef Feature) { assert(!Feature.empty() && "Empty string"); // Get first character char Ch = Feature[0]; @@ -37,13 +37,13 @@ static inline bool hasFlag(const std::string &Feature) { /// StripFlag - Return string stripped of flag. /// -static inline std::string StripFlag(const std::string &Feature) { +static inline std::string StripFlag(const StringRef Feature) { return hasFlag(Feature) ? Feature.substr(1) : Feature; } /// isEnabled - Return true if enable flag; '+'. /// -static inline bool isEnabled(const std::string &Feature) { +static inline bool isEnabled(const StringRef Feature) { assert(!Feature.empty() && "Empty string"); // Get first character char Ch = Feature[0]; @@ -53,16 +53,22 @@ static inline bool isEnabled(const std::string &Feature) { /// PrependFlag - Return a string with a prepended flag; '+' or '-'. /// -static inline std::string PrependFlag(const std::string &Feature, - bool IsEnabled) { +static inline std::string PrependFlag(const StringRef Feature, + bool IsEnabled) { assert(!Feature.empty() && "Empty string"); - if (hasFlag(Feature)) return Feature; - return std::string(IsEnabled ? "+" : "-") + Feature; + if (hasFlag(Feature)) + return Feature; + std::string Prefix = IsEnabled ? "+" : "-"; + Prefix += Feature; + return Prefix; } /// Split - Splits a string of comma separated items in to a vector of strings. /// -static void Split(std::vector<std::string> &V, const std::string &S) { +static void Split(std::vector<std::string> &V, const StringRef S) { + if (S.empty()) + return; + // Start at beginning of string. size_t Pos = 0; while (true) { @@ -88,7 +94,7 @@ static std::string Join(const std::vector<std::string> &V) { std::string Result; // If the vector is not empty if (!V.empty()) { - // Start with the CPU feature + // Start with the first feature Result = V[0]; // For each successive feature for (size_t i = 1; i < V.size(); i++) { @@ -103,7 +109,7 @@ static std::string Join(const std::vector<std::string> &V) { } /// Adding features. -void SubtargetFeatures::AddFeature(const std::string &String, +void SubtargetFeatures::AddFeature(const StringRef String, bool IsEnabled) { // Don't add empty features if (!String.empty()) { @@ -113,16 +119,16 @@ void SubtargetFeatures::AddFeature(const std::string &String, } /// Find KV in array using binary search. -template<typename T> const T *Find(const std::string &S, const T *A, size_t L) { +template<typename T> const T *Find(const StringRef S, const T *A, size_t L) { // Make the lower bound element we're looking for T KV; - KV.Key = S.c_str(); + KV.Key = S.data(); // Determine the end of the array const T *Hi = A + L; // Binary search the array const T *F = std::lower_bound(A, Hi, KV); // If not found then return NULL - if (F == Hi || std::string(F->Key) != S) return NULL; + if (F == Hi || StringRef(F->Key) != S) return NULL; // Return the found array item return F; } @@ -170,7 +176,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, // SubtargetFeatures Implementation //===----------------------------------------------------------------------===// -SubtargetFeatures::SubtargetFeatures(const std::string &Initial) { +SubtargetFeatures::SubtargetFeatures(const StringRef Initial) { // Break up string into separate features Split(Features, Initial); } @@ -179,33 +185,6 @@ SubtargetFeatures::SubtargetFeatures(const std::string &Initial) { std::string SubtargetFeatures::getString() const { return Join(Features); } -void SubtargetFeatures::setString(const std::string &Initial) { - // Throw out old features - Features.clear(); - // Break up string into separate features - Split(Features, LowercaseString(Initial)); -} - - -/// setCPU - Set the CPU string. Replaces previous setting. Setting to "" -/// clears CPU. -void SubtargetFeatures::setCPU(const std::string &String) { - Features[0] = LowercaseString(String); -} - - -/// setCPUIfNone - Setting CPU string only if no string is set. -/// -void SubtargetFeatures::setCPUIfNone(const std::string &String) { - if (Features[0].empty()) setCPU(String); -} - -/// getCPU - Returns current CPU. -/// -const std::string & SubtargetFeatures::getCPU() const { - return Features[0]; -} - /// SetImpliedBits - For each feature that is (transitively) implied by this /// feature, set it. @@ -245,14 +224,48 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, } } -/// getBits - Get feature bits. +/// ToggleFeature - Toggle a feature and returns the newly updated feature +/// bits. +uint64_t +SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature, + const SubtargetFeatureKV *FeatureTable, + size_t FeatureTableSize) { + // Find feature in table. + const SubtargetFeatureKV *FeatureEntry = + Find(StripFlag(Feature), FeatureTable, FeatureTableSize); + // If there is a match + if (FeatureEntry) { + if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) { + Bits &= ~FeatureEntry->Value; + + // For each feature that implies this, clear it. + ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + } else { + Bits |= FeatureEntry->Value; + + // For each feature that this implies, set it. + SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + } + } else { + errs() << "'" << Feature + << "' is not a recognized feature for this target" + << " (ignoring feature)\n"; + } + + return Bits; +} + + +/// getFeatureBits - Get feature bits a CPU. /// -uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, - size_t CPUTableSize, - const SubtargetFeatureKV *FeatureTable, - size_t FeatureTableSize) { - assert(CPUTable && "missing CPU table"); - assert(FeatureTable && "missing features table"); +uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, + const SubtargetFeatureKV *CPUTable, + size_t CPUTableSize, + const SubtargetFeatureKV *FeatureTable, + size_t FeatureTableSize) { + if (!FeatureTableSize || !CPUTableSize) + return 0; + #ifndef NDEBUG for (size_t i = 1; i < CPUTableSize; i++) { assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 && @@ -266,31 +279,33 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, uint64_t Bits = 0; // Resulting bits // Check if help is needed - if (Features[0] == "help") + if (CPU == "help") Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); - // Find CPU entry - const SubtargetFeatureKV *CPUEntry = - Find(Features[0], CPUTable, CPUTableSize); - // If there is a match - if (CPUEntry) { - // Set base feature bits - Bits = CPUEntry->Value; - - // Set the feature implied by this CPU feature, if any. - for (size_t i = 0; i < FeatureTableSize; ++i) { - const SubtargetFeatureKV &FE = FeatureTable[i]; - if (CPUEntry->Value & FE.Value) - SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + // Find CPU entry if CPU name is specified. + if (!CPU.empty()) { + const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize); + // If there is a match + if (CPUEntry) { + // Set base feature bits + Bits = CPUEntry->Value; + + // Set the feature implied by this CPU feature, if any. + for (size_t i = 0; i < FeatureTableSize; ++i) { + const SubtargetFeatureKV &FE = FeatureTable[i]; + if (CPUEntry->Value & FE.Value) + SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + } + } else { + errs() << "'" << CPU + << "' is not a recognized processor for this target" + << " (ignoring processor)\n"; } - } else { - errs() << "'" << Features[0] - << "' is not a recognized processor for this target" - << " (ignoring processor)\n"; } + // Iterate through each feature - for (size_t i = 1; i < Features.size(); i++) { - const std::string &Feature = Features[i]; + for (size_t i = 0, E = Features.size(); i < E; i++) { + const StringRef Feature = Features[i]; // Check for help if (Feature == "+help") @@ -323,9 +338,10 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, return Bits; } -/// Get info pointer -void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table, - size_t TableSize) { +/// Get scheduling itinerary of a CPU. +void *SubtargetFeatures::getItinerary(const StringRef CPU, + const SubtargetInfoKV *Table, + size_t TableSize) { assert(Table && "missing table"); #ifndef NDEBUG for (size_t i = 1; i < TableSize; i++) { @@ -334,12 +350,12 @@ void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table, #endif // Find entry - const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize); + const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize); if (Entry) { return Entry->Value; } else { - errs() << "'" << Features[0] + errs() << "'" << CPU << "' is not a recognized processor for this target" << " (ignoring processor)\n"; return NULL; @@ -367,10 +383,7 @@ void SubtargetFeatures::dump() const { /// subtarget. It would be better if we could encode this information /// into the IR. See <rdar://5972456>. /// -void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU, - const Triple& Triple) { - setCPU(CPU); - +void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) { if (Triple.getVendor() == Triple::Apple) { if (Triple.getArch() == Triple::ppc) { // powerpc-apple-* diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp new file mode 100644 index 000000000000..4b31c7557dd3 --- /dev/null +++ b/lib/Object/Binary.cpp @@ -0,0 +1,96 @@ +//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Binary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Binary.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +// Include headers for createBinary. +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/COFF.h" + +using namespace llvm; +using namespace object; + +Binary::~Binary() { + delete Data; +} + +Binary::Binary(unsigned int Type, MemoryBuffer *Source) + : TypeID(Type) + , Data(Source) {} + +StringRef Binary::getData() const { + return Data->getBuffer(); +} + +StringRef Binary::getFileName() const { + return Data->getBufferIdentifier(); +} + +error_code object::createBinary(MemoryBuffer *Source, + OwningPtr<Binary> &Result) { + OwningPtr<MemoryBuffer> scopedSource(Source); + if (!Source) + return make_error_code(errc::invalid_argument); + if (Source->getBufferSize() < 64) + return object_error::invalid_file_type; + sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(), + static_cast<unsigned>(Source->getBufferSize())); + error_code ec; + switch (type) { + case sys::ELF_Relocatable_FileType: + case sys::ELF_Executable_FileType: + case sys::ELF_SharedObject_FileType: + case sys::ELF_Core_FileType: { + OwningPtr<Binary> ret( + ObjectFile::createELFObjectFile(scopedSource.take())); + if (!ret) + return object_error::invalid_file_type; + Result.swap(ret); + return object_error::success; + } + case sys::Mach_O_Object_FileType: + case sys::Mach_O_Executable_FileType: + case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: + case sys::Mach_O_Core_FileType: + case sys::Mach_O_PreloadExecutable_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: + case sys::Mach_O_DynamicLinker_FileType: + case sys::Mach_O_Bundle_FileType: + case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: { + OwningPtr<Binary> ret( + ObjectFile::createMachOObjectFile(scopedSource.take())); + if (!ret) + return object_error::invalid_file_type; + Result.swap(ret); + return object_error::success; + } + case sys::COFF_FileType: { + OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec)); + if (ec) return ec; + Result.swap(ret); + return object_error::success; + } + default: // Unrecognized object file format. + return object_error::invalid_file_type; + } +} + +error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) { + OwningPtr<MemoryBuffer> File; + if (error_code ec = MemoryBuffer::getFile(Path, File)) + return ec; + return createBinary(File.take(), Result); +} diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 642a8ece8b76..68e5e94924d0 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -1,8 +1,10 @@ add_llvm_library(LLVMObject + Binary.cpp + COFFObjectFile.cpp + ELFObjectFile.cpp + Error.cpp MachOObject.cpp MachOObjectFile.cpp Object.cpp ObjectFile.cpp - COFFObjectFile.cpp - ELFObjectFile.cpp ) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 86bf44baaeb6..07de6bc99973 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -11,11 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/COFF.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Endian.h" using namespace llvm; using namespace object; @@ -28,174 +26,164 @@ using support::little16_t; } namespace { -struct coff_file_header { - ulittle16_t Machine; - ulittle16_t NumberOfSections; - ulittle32_t TimeDateStamp; - ulittle32_t PointerToSymbolTable; - ulittle32_t NumberOfSymbols; - ulittle16_t SizeOfOptionalHeader; - ulittle16_t Characteristics; -}; +// Returns false if size is greater than the buffer size. And sets ec. +bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) { + if (m->getBufferSize() < size) { + ec = object_error::unexpected_eof; + return false; + } + return true; } -extern char coff_file_header_layout_static_assert - [sizeof(coff_file_header) == 20 ? 1 : -1]; - -namespace { -struct coff_symbol { - struct StringTableOffset { - ulittle32_t Zeroes; - ulittle32_t Offset; - }; - - union { - char ShortName[8]; - StringTableOffset Offset; - } Name; - - ulittle32_t Value; - little16_t SectionNumber; - - struct { - ulittle8_t BaseType; - ulittle8_t ComplexType; - } Type; - - ulittle8_t StorageClass; - ulittle8_t NumberOfAuxSymbols; -}; +// Returns false if any bytes in [addr, addr + size) fall outsize of m. +bool checkAddr(const MemoryBuffer *m, + error_code &ec, + uintptr_t addr, + uint64_t size) { + if (addr + size < addr || + addr + size < size || + addr + size > uintptr_t(m->getBufferEnd())) { + ec = object_error::unexpected_eof; + return false; + } + return true; +} } -extern char coff_coff_symbol_layout_static_assert - [sizeof(coff_symbol) == 18 ? 1 : -1]; +const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const { + const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p); -namespace { -struct coff_section { - char Name[8]; - ulittle32_t VirtualSize; - ulittle32_t VirtualAddress; - ulittle32_t SizeOfRawData; - ulittle32_t PointerToRawData; - ulittle32_t PointerToRelocations; - ulittle32_t PointerToLinenumbers; - ulittle16_t NumberOfRelocations; - ulittle16_t NumberOfLinenumbers; - ulittle32_t Characteristics; -}; +# ifndef NDEBUG + // Verify that the symbol points to a valid entry in the symbol table. + uintptr_t offset = uintptr_t(addr) - uintptr_t(base()); + if (offset < Header->PointerToSymbolTable + || offset >= Header->PointerToSymbolTable + + (Header->NumberOfSymbols * sizeof(coff_symbol))) + report_fatal_error("Symbol was outside of symbol table."); + + assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol) + == 0 && "Symbol did not point to the beginning of a symbol"); +# endif + + return addr; } -extern char coff_coff_section_layout_static_assert - [sizeof(coff_section) == 40 ? 1 : -1]; +const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const { + const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p); -namespace { -class COFFObjectFile : public ObjectFile { -private: - uint64_t HeaderOff; - const coff_file_header *Header; - const coff_section *SectionTable; - const coff_symbol *SymbolTable; - const char *StringTable; - - const coff_section *getSection(std::size_t index) const; - const char *getString(std::size_t offset) const; - -protected: - virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; - virtual StringRef getSymbolName(DataRefImpl Symb) const; - virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; - virtual uint64_t getSymbolSize(DataRefImpl Symb) const; - virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; - virtual bool isSymbolInternal(DataRefImpl Symb) const; - - virtual SectionRef getSectionNext(DataRefImpl Sec) const; - virtual StringRef getSectionName(DataRefImpl Sec) const; - virtual uint64_t getSectionAddress(DataRefImpl Sec) const; - virtual uint64_t getSectionSize(DataRefImpl Sec) const; - virtual StringRef getSectionContents(DataRefImpl Sec) const; - virtual bool isSectionText(DataRefImpl Sec) const; - -public: - COFFObjectFile(MemoryBuffer *Object); - virtual symbol_iterator begin_symbols() const; - virtual symbol_iterator end_symbols() const; - virtual section_iterator begin_sections() const; - virtual section_iterator end_sections() const; - - virtual uint8_t getBytesInAddress() const; - virtual StringRef getFileFormatName() const; - virtual unsigned getArch() const; -}; -} // end namespace - -SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); +# ifndef NDEBUG + // Verify that the section points to a valid entry in the section table. + if (addr < SectionTable + || addr >= (SectionTable + Header->NumberOfSections)) + report_fatal_error("Section was outside of section table."); + + uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable); + assert(offset % sizeof(coff_section) == 0 && + "Section did not point to the beginning of a section"); +# endif + + return addr; +} + +error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb, + SymbolRef &Result) const { + const coff_symbol *symb = toSymb(Symb); symb += 1 + symb->NumberOfAuxSymbols; - Symb.p = reinterpret_cast<intptr_t>(symb); - return SymbolRef(Symb, this); + Symb.p = reinterpret_cast<uintptr_t>(symb); + Result = SymbolRef(Symb, this); + return object_error::success; } -StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); + error_code COFFObjectFile::getSymbolName(DataRefImpl Symb, + StringRef &Result) const { + const coff_symbol *symb = toSymb(Symb); // Check for string table entry. First 4 bytes are 0. if (symb->Name.Offset.Zeroes == 0) { uint32_t Offset = symb->Name.Offset.Offset; - return StringRef(getString(Offset)); + if (error_code ec = getString(Offset, Result)) + return ec; + return object_error::success; } if (symb->Name.ShortName[7] == 0) // Null terminated, let ::strlen figure out the length. - return StringRef(symb->Name.ShortName); - // Not null terminated, use all 8 bytes. - return StringRef(symb->Name.ShortName, 8); + Result = StringRef(symb->Name.ShortName); + else + // Not null terminated, use all 8 bytes. + Result = StringRef(symb->Name.ShortName, 8); + return object_error::success; } -uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); - const coff_section *Section = getSection(symb->SectionNumber); - char Type = getSymbolNMTypeChar(Symb); +error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { + const coff_symbol *symb = toSymb(Symb); + const coff_section *Section = NULL; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; + char Type; + if (error_code ec = getSymbolNMTypeChar(Symb, Type)) + return ec; if (Type == 'U' || Type == 'w') - return UnknownAddressOrSize; - if (Section) - return Section->VirtualAddress + symb->Value; - return symb->Value; + Result = UnknownAddressOrSize; + else if (Section) + Result = Section->VirtualAddress + symb->Value; + else + Result = symb->Value; + return object_error::success; } -uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const { +error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb, + uint64_t &Result) const { // FIXME: Return the correct size. This requires looking at all the symbols // in the same section as this symbol, and looking for either the next // symbol, or the end of the section. - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); - const coff_section *Section = getSection(symb->SectionNumber); - char Type = getSymbolNMTypeChar(Symb); + const coff_symbol *symb = toSymb(Symb); + const coff_section *Section = NULL; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; + char Type; + if (error_code ec = getSymbolNMTypeChar(Symb, Type)) + return ec; if (Type == 'U' || Type == 'w') - return UnknownAddressOrSize; - if (Section) - return Section->SizeOfRawData - symb->Value; - return 0; + Result = UnknownAddressOrSize; + else if (Section) + Result = Section->SizeOfRawData - symb->Value; + else + Result = 0; + return object_error::success; } -char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const { - const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p); - char ret = StringSwitch<char>(getSymbolName(Symb)) +error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, + char &Result) const { + const coff_symbol *symb = toSymb(Symb); + StringRef name; + if (error_code ec = getSymbolName(Symb, name)) + return ec; + char ret = StringSwitch<char>(name) .StartsWith(".debug", 'N') .StartsWith(".sxdata", 'N') .Default('?'); - if (ret != '?') - return ret; + if (ret != '?') { + Result = ret; + return object_error::success; + } uint32_t Characteristics = 0; - if (const coff_section *Section = getSection(symb->SectionNumber)) { + if (symb->SectionNumber > 0) { + const coff_section *Section = NULL; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; Characteristics = Section->Characteristics; } switch (symb->SectionNumber) { case COFF::IMAGE_SYM_UNDEFINED: // Check storage classes. - if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) - return 'w'; // Don't do ::toupper. - else + if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) { + Result = 'w'; + return object_error::success; // Don't do ::toupper. + } else ret = 'u'; break; case COFF::IMAGE_SYM_ABSOLUTE: @@ -227,22 +215,28 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const { if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL) ret = ::toupper(ret); - return ret; + Result = ret; + return object_error::success; } -bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const { - return false; +error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb, + bool &Result) const { + Result = false; + return object_error::success; } -SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); +error_code COFFObjectFile::getSectionNext(DataRefImpl Sec, + SectionRef &Result) const { + const coff_section *sec = toSec(Sec); sec += 1; - Sec.p = reinterpret_cast<intptr_t>(sec); - return SectionRef(Sec, this); + Sec.p = reinterpret_cast<uintptr_t>(sec); + Result = SectionRef(Sec, this); + return object_error::success; } -StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); +error_code COFFObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Result) const { + const coff_section *sec = toSec(Sec); StringRef name; if (sec->Name[7] == 0) // Null terminated, let ::strlen figure out the length. @@ -255,64 +249,124 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const { if (name[0] == '/') { uint32_t Offset; name.substr(1).getAsInteger(10, Offset); - return StringRef(getString(Offset)); + if (error_code ec = getString(Offset, name)) + return ec; } - // It's just a normal name. - return name; + Result = name; + return object_error::success; +} + +error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec, + uint64_t &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->VirtualAddress; + return object_error::success; } -uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return sec->VirtualAddress; +error_code COFFObjectFile::getSectionSize(DataRefImpl Sec, + uint64_t &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->SizeOfRawData; + return object_error::success; } -uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return sec->SizeOfRawData; +error_code COFFObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Result) const { + const coff_section *sec = toSec(Sec); + // The only thing that we need to verify is that the contents is contained + // within the file bounds. We don't need to make sure it doesn't cover other + // data, as there's nothing that says that is not allowed. + uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData; + uintptr_t con_end = con_start + sec->SizeOfRawData; + if (con_end >= uintptr_t(Data->getBufferEnd())) + return object_error::parse_failed; + Result = StringRef(reinterpret_cast<const char*>(con_start), + sec->SizeOfRawData); + return object_error::success; } -StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData), - sec->SizeOfRawData); +error_code COFFObjectFile::isSectionText(DataRefImpl Sec, + bool &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; + return object_error::success; } -bool COFFObjectFile::isSectionText(DataRefImpl Sec) const { - const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p); - return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; +error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec, + DataRefImpl Symb, + bool &Result) const { + // FIXME: Unimplemented. + Result = false; + return object_error::success; } -COFFObjectFile::COFFObjectFile(MemoryBuffer *Object) - : ObjectFile(Object) { +COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) + : ObjectFile(Binary::isCOFF, Object, ec) { + // Check that we at least have enough room for a header. + if (!checkSize(Data, ec, sizeof(coff_file_header))) return; - HeaderOff = 0; + // The actual starting location of the COFF header in the file. This can be + // non-zero in PE/COFF files. + uint64_t HeaderStart = 0; - if (base[0] == 0x4d && base[1] == 0x5a) { + // Check if this is a PE/COFF file. + if (base()[0] == 0x4d && base()[1] == 0x5a) { // PE/COFF, seek through MS-DOS compatibility stub and 4-byte // PE signature to find 'normal' COFF header. - HeaderOff += *reinterpret_cast<const ulittle32_t *>(base + 0x3c); - HeaderOff += 4; + if (!checkSize(Data, ec, 0x3c + 8)) return; + HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c); + // Check the PE header. ("PE\0\0") + if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) { + ec = object_error::parse_failed; + return; + } + HeaderStart += 4; // Skip the PE Header. } - Header = reinterpret_cast<const coff_file_header *>(base + HeaderOff); + Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart); + if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header))) + return; + SectionTable = - reinterpret_cast<const coff_section *>( base - + HeaderOff + reinterpret_cast<const coff_section *>( base() + + HeaderStart + sizeof(coff_file_header) + Header->SizeOfOptionalHeader); + if (!checkAddr(Data, ec, uintptr_t(SectionTable), + Header->NumberOfSections * sizeof(coff_section))) + return; + SymbolTable = - reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable); + reinterpret_cast<const coff_symbol *>(base() + + Header->PointerToSymbolTable); + if (!checkAddr(Data, ec, uintptr_t(SymbolTable), + Header->NumberOfSymbols * sizeof(coff_symbol))) + return; // Find string table. - StringTable = reinterpret_cast<const char *>(base) - + Header->PointerToSymbolTable - + Header->NumberOfSymbols * 18; + StringTable = reinterpret_cast<const char *>(base()) + + Header->PointerToSymbolTable + + Header->NumberOfSymbols * sizeof(coff_symbol); + if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t))) + return; + + StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable); + if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize)) + return; + // Check that the string table is null terminated if has any in it. + if (StringTableSize < 4 + || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) { + ec = object_error::parse_failed; + return; + } + + ec = object_error::success; } ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -320,21 +374,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(StringTable); return symbol_iterator(SymbolRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable); return section_iterator(SectionRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; - memset(&ret, 0, sizeof(DataRefImpl)); + std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections); return section_iterator(SectionRef(ret, this)); } @@ -365,24 +419,37 @@ unsigned COFFObjectFile::getArch() const { } } -const coff_section *COFFObjectFile::getSection(std::size_t index) const { - if (index > 0 && index <= Header->NumberOfSections) - return SectionTable + (index - 1); - return 0; +error_code COFFObjectFile::getSection(int32_t index, + const coff_section *&Result) const { + // Check for special index values. + if (index == COFF::IMAGE_SYM_UNDEFINED || + index == COFF::IMAGE_SYM_ABSOLUTE || + index == COFF::IMAGE_SYM_DEBUG) + Result = NULL; + else if (index > 0 && index <= Header->NumberOfSections) + // We already verified the section table data, so no need to check again. + Result = SectionTable + (index - 1); + else + return object_error::parse_failed; + return object_error::success; } -const char *COFFObjectFile::getString(std::size_t offset) const { - const ulittle32_t *StringTableSize = - reinterpret_cast<const ulittle32_t *>(StringTable); - if (offset < *StringTableSize) - return StringTable + offset; - return 0; +error_code COFFObjectFile::getString(uint32_t offset, + StringRef &Result) const { + if (StringTableSize <= 4) + // Tried to get a string from an empty string table. + return object_error::parse_failed; + if (offset >= StringTableSize) + return object_error::unexpected_eof; + Result = StringRef(StringTable + offset); + return object_error::success; } namespace llvm { ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) { - return new COFFObjectFile(Object); + error_code ec; + return new COFFObjectFile(Object, ec); } } // end namespace llvm diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index d2a2726ce739..e2ff4dfc0384 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -222,22 +222,24 @@ class ELFObjectFile : public ObjectFile { const char *getString(const Elf_Shdr *section, uint32_t offset) const; protected: - virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; - virtual StringRef getSymbolName(DataRefImpl Symb) const; - virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; - virtual uint64_t getSymbolSize(DataRefImpl Symb) const; - virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; - virtual bool isSymbolInternal(DataRefImpl Symb) const; - - virtual SectionRef getSectionNext(DataRefImpl Sec) const; - virtual StringRef getSectionName(DataRefImpl Sec) const; - virtual uint64_t getSectionAddress(DataRefImpl Sec) const; - virtual uint64_t getSectionSize(DataRefImpl Sec) const; - virtual StringRef getSectionContents(DataRefImpl Sec) const; - virtual bool isSectionText(DataRefImpl Sec) const; + virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; + virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; + virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; + virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; + + virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; + virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; + virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; + virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, + bool &Result) const; public: - ELFObjectFile(MemoryBuffer *Object); + ELFObjectFile(MemoryBuffer *Object, error_code &ec); virtual symbol_iterator begin_symbols() const; virtual symbol_iterator end_symbols() const; virtual section_iterator begin_sections() const; @@ -259,9 +261,9 @@ void ELFObjectFile<target_endianness, is64Bits> // an error object around. if (!( symb && SymbolTableSection - && symb >= (const Elf_Sym*)(base + && symb >= (const Elf_Sym*)(base() + SymbolTableSection->sh_offset) - && symb < (const Elf_Sym*)(base + && symb < (const Elf_Sym*)(base() + SymbolTableSection->sh_offset + SymbolTableSection->sh_size))) // FIXME: Proper error handling. @@ -269,8 +271,9 @@ void ELFObjectFile<target_endianness, is64Bits> } template<support::endianness target_endianness, bool is64Bits> -SymbolRef ELFObjectFile<target_endianness, is64Bits> - ::getSymbolNext(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolNext(DataRefImpl Symb, + SymbolRef &Result) const { validateSymbol(Symb); const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b]; @@ -287,63 +290,80 @@ SymbolRef ELFObjectFile<target_endianness, is64Bits> } } - return SymbolRef(Symb, this); + Result = SymbolRef(Symb, this); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -StringRef ELFObjectFile<target_endianness, is64Bits> - ::getSymbolName(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolName(DataRefImpl Symb, + StringRef &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if (symb->st_name == 0) { const Elf_Shdr *section = getSection(symb->st_shndx); if (!section) - return ""; - return getString(dot_shstrtab_sec, section->sh_name); + Result = ""; + else + Result = getString(dot_shstrtab_sec, section->sh_name); + return object_error::success; } // Use the default symbol table name section. - return getString(dot_strtab_sec, symb->st_name); + Result = getString(dot_strtab_sec, symb->st_name); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSymbolAddress(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section; switch (symb->st_shndx) { case ELF::SHN_COMMON: // Undefined symbols have no address yet. - case ELF::SHN_UNDEF: return UnknownAddressOrSize; - case ELF::SHN_ABS: return symb->st_value; + case ELF::SHN_UNDEF: + Result = UnknownAddressOrSize; + return object_error::success; + case ELF::SHN_ABS: + Result = symb->st_value; + return object_error::success; default: Section = getSection(symb->st_shndx); } switch (symb->getType()) { - case ELF::STT_SECTION: return Section ? Section->sh_addr - : UnknownAddressOrSize; + case ELF::STT_SECTION: + Result = Section ? Section->sh_addr : UnknownAddressOrSize; + return object_error::success; case ELF::STT_FUNC: case ELF::STT_OBJECT: case ELF::STT_NOTYPE: - return symb->st_value; - default: return UnknownAddressOrSize; + Result = symb->st_value; + return object_error::success; + default: + Result = UnknownAddressOrSize; + return object_error::success; } } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSymbolSize(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolSize(DataRefImpl Symb, + uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if (symb->st_size == 0) - return UnknownAddressOrSize; - return symb->st_size; + Result = UnknownAddressOrSize; + Result = symb->st_size; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -char ELFObjectFile<target_endianness, is64Bits> - ::getSymbolNMTypeChar(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolNMTypeChar(DataRefImpl Symb, + char &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section = getSection(symb->st_shndx); @@ -390,89 +410,120 @@ char ELFObjectFile<target_endianness, is64Bits> ret = 'W'; } - if (ret == '?' && symb->getType() == ELF::STT_SECTION) - return StringSwitch<char>(getSymbolName(Symb)) + if (ret == '?' && symb->getType() == ELF::STT_SECTION) { + StringRef name; + if (error_code ec = getSymbolName(Symb, name)) + return ec; + Result = StringSwitch<char>(name) .StartsWith(".debug", 'N') .StartsWith(".note", 'n'); + return object_error::success; + } - return ret; + Result = ret; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -bool ELFObjectFile<target_endianness, is64Bits> - ::isSymbolInternal(DataRefImpl Symb) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::isSymbolInternal(DataRefImpl Symb, + bool &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); if ( symb->getType() == ELF::STT_FILE || symb->getType() == ELF::STT_SECTION) - return true; - return false; + Result = true; + Result = false; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -SectionRef ELFObjectFile<target_endianness, is64Bits> - ::getSectionNext(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const { const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p); sec += Header->e_shentsize; Sec.p = reinterpret_cast<intptr_t>(sec); - return SectionRef(Sec, this); + Result = SectionRef(Sec, this); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -StringRef ELFObjectFile<target_endianness, is64Bits> - ::getSectionName(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionName(DataRefImpl Sec, + StringRef &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - return StringRef(getString(dot_shstrtab_sec, sec->sh_name)); + Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name)); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSectionAddress(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionAddress(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - return sec->sh_addr; + Result = sec->sh_addr; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -uint64_t ELFObjectFile<target_endianness, is64Bits> - ::getSectionSize(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionSize(DataRefImpl Sec, + uint64_t &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - return sec->sh_size; + Result = sec->sh_size; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -StringRef ELFObjectFile<target_endianness, is64Bits> - ::getSectionContents(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::getSectionContents(DataRefImpl Sec, + StringRef &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); - const char *start = (char*)base + sec->sh_offset; - return StringRef(start, sec->sh_size); + const char *start = (const char*)base() + sec->sh_offset; + Result = StringRef(start, sec->sh_size); + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -bool ELFObjectFile<target_endianness, is64Bits> - ::isSectionText(DataRefImpl Sec) const { +error_code ELFObjectFile<target_endianness, is64Bits> + ::isSectionText(DataRefImpl Sec, + bool &Result) const { const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p); if (sec->sh_flags & ELF::SHF_EXECINSTR) - return true; - return false; + Result = true; + else + Result = false; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> + ::sectionContainsSymbol(DataRefImpl Sec, + DataRefImpl Symb, + bool &Result) const { + // FIXME: Unimplemented. + Result = false; + return object_error::success; } template<support::endianness target_endianness, bool is64Bits> -ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object) - : ObjectFile(Object) +ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object + , error_code &ec) + : ObjectFile(Binary::isELF, Object, ec) , SectionHeaderTable(0) , dot_shstrtab_sec(0) , dot_strtab_sec(0) { - Header = reinterpret_cast<const Elf_Ehdr *>(base); + Header = reinterpret_cast<const Elf_Ehdr *>(base()); if (Header->e_shoff == 0) return; SectionHeaderTable = - reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff); + reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff); uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize; if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize - <= base + MapFile->getBufferSize())) + <= base() + Data->getBufferSize())) // FIXME: Proper error handling. report_fatal_error("Section table goes past end of file!"); @@ -491,7 +542,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object) dot_shstrtab_sec = getSection(Header->e_shstrndx); if (dot_shstrtab_sec) { // Verify that the last byte in the string table in a null. - if (((const char*)base + dot_shstrtab_sec->sh_offset) + if (((const char*)base() + dot_shstrtab_sec->sh_offset) [dot_shstrtab_sec->sh_size - 1] != 0) // FIXME: Proper error handling. report_fatal_error("String table must end with a null terminator!"); @@ -509,7 +560,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object) // FIXME: Proper error handling. report_fatal_error("Already found section named .strtab!"); dot_strtab_sec = sh; - const char *dot_strtab = (const char*)base + sh->sh_offset; + const char *dot_strtab = (const char*)base() + sh->sh_offset; if (dot_strtab[sh->sh_size - 1] != 0) // FIXME: Proper error handling. report_fatal_error("String table must end with a null terminator!"); @@ -548,7 +599,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> ::begin_sections() const { DataRefImpl ret; memset(&ret, 0, sizeof(DataRefImpl)); - ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff); + ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff); return section_iterator(SectionRef(ret, this)); } @@ -557,7 +608,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> ::end_sections() const { DataRefImpl ret; memset(&ret, 0, sizeof(DataRefImpl)); - ret.p = reinterpret_cast<intptr_t>(base + ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff + (Header->e_shentsize * Header->e_shnum)); return section_iterator(SectionRef(ret, this)); @@ -613,7 +664,7 @@ const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym * ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const { const Elf_Shdr *sec = SymbolTableSections[Symb.d.b]; return reinterpret_cast<const Elf_Sym *>( - base + base() + sec->sh_offset + (Symb.d.a * sec->sh_entsize)); } @@ -656,8 +707,8 @@ const char *ELFObjectFile<target_endianness, is64Bits> assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!"); if (offset >= section->sh_size) // FIXME: Proper error handling. - report_fatal_error("Sybol name offset outside of string table!"); - return (const char *)base + section->sh_offset + offset; + report_fatal_error("Symbol name offset outside of string table!"); + return (const char *)base() + section->sh_offset + offset; } // EI_CLASS, EI_DATA. @@ -673,14 +724,15 @@ namespace llvm { ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object); + error_code ec; if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) - return new ELFObjectFile<support::little, false>(Object); + return new ELFObjectFile<support::little, false>(Object, ec); else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) - return new ELFObjectFile<support::big, false>(Object); + return new ELFObjectFile<support::big, false>(Object, ec); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) - return new ELFObjectFile<support::little, true>(Object); + return new ELFObjectFile<support::little, true>(Object, ec); else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) - return new ELFObjectFile<support::big, true>(Object); + return new ELFObjectFile<support::big, true>(Object, ec); // FIXME: Proper error handling. report_fatal_error("Not an ELF object file!"); } diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp new file mode 100644 index 000000000000..25946257ab5a --- /dev/null +++ b/lib/Object/Error.cpp @@ -0,0 +1,57 @@ +//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines a new error_category for the Object library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/Error.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; +using namespace object; + +namespace { +class _object_error_category : public _do_message { +public: + virtual const char* name() const; + virtual std::string message(int ev) const; + virtual error_condition default_error_condition(int ev) const; +}; +} + +const char *_object_error_category::name() const { + return "llvm.object"; +} + +std::string _object_error_category::message(int ev) const { + switch (ev) { + case object_error::success: return "Success"; + case object_error::invalid_file_type: + return "The file was not recognized as a valid object file"; + case object_error::parse_failed: + return "Invalid data was encountered while parsing the file"; + case object_error::unexpected_eof: + return "The end of the file was unexpectedly encountered"; + default: + llvm_unreachable("An enumerator of object_error does not have a message " + "defined."); + } +} + +error_condition _object_error_category::default_error_condition(int ev) const { + if (ev == object_error::success) + return errc::success; + return errc::invalid_argument; +} + +const error_category &object::object_category() { + static _object_error_category o; + return o; +} diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 877cbfbdb808..26a6e136d753 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -32,8 +32,8 @@ typedef MachOObject::LoadCommandInfo LoadCommandInfo; class MachOObjectFile : public ObjectFile { public: - MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO) - : ObjectFile(Object), + MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec) + : ObjectFile(Binary::isMachO, Object, ec), MachOObj(MOO), RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {} @@ -47,19 +47,21 @@ public: virtual unsigned getArch() const; protected: - virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; - virtual StringRef getSymbolName(DataRefImpl Symb) const; - virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; - virtual uint64_t getSymbolSize(DataRefImpl Symb) const; - virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; - virtual bool isSymbolInternal(DataRefImpl Symb) const; - - virtual SectionRef getSectionNext(DataRefImpl Sec) const; - virtual StringRef getSectionName(DataRefImpl Sec) const; - virtual uint64_t getSectionAddress(DataRefImpl Sec) const; - virtual uint64_t getSectionSize(DataRefImpl Sec) const; - virtual StringRef getSectionContents(DataRefImpl Sec) const; - virtual bool isSectionText(DataRefImpl Sec) const; + virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; + virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; + virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; + virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; + + virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; + virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; + virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; + virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S, + bool &Result) const; private: MachOObject *MachOObj; @@ -68,16 +70,21 @@ private: void moveToNextSection(DataRefImpl &DRI) const; void getSymbolTableEntry(DataRefImpl DRI, InMemoryStruct<macho::SymbolTableEntry> &Res) const; + void getSymbol64TableEntry(DataRefImpl DRI, + InMemoryStruct<macho::Symbol64TableEntry> &Res) const; void moveToNextSymbol(DataRefImpl &DRI) const; void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const; + void getSection64(DataRefImpl DRI, + InMemoryStruct<macho::Section64> &Res) const; }; ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { + error_code ec; std::string Err; MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err); if (!MachOObj) return NULL; - return new MachOObjectFile(Buffer, MachOObj); + return new MachOObjectFile(Buffer, MachOObj, ec); } /*===-- Symbols -----------------------------------------------------------===*/ @@ -113,35 +120,81 @@ void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI, Res); } +void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI, + InMemoryStruct<macho::Symbol64TableEntry> &Res) const { + InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); + + if (RegisteredStringTable != DRI.d.a) { + MachOObj->RegisterStringTable(*SymtabLoadCmd); + RegisteredStringTable = DRI.d.a; + } + + MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, + Res); +} -SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const { + +error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI, + SymbolRef &Result) const { DRI.d.b++; moveToNextSymbol(DRI); - return SymbolRef(DRI, this); + Result = SymbolRef(DRI, this); + return object_error::success; } -StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - return MachOObj->getStringAtIndex(Entry->StringIndex); +error_code MachOObjectFile::getSymbolName(DataRefImpl DRI, + StringRef &Result) const { + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(DRI, Entry); + Result = MachOObj->getStringAtIndex(Entry->StringIndex); + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + Result = MachOObj->getStringAtIndex(Entry->StringIndex); + } + return object_error::success; } -uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - return Entry->Value; +error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI, + uint64_t &Result) const { + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(DRI, Entry); + Result = Entry->Value; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + Result = Entry->Value; + } + return object_error::success; } -uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const { - return UnknownAddressOrSize; +error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, + uint64_t &Result) const { + Result = UnknownAddressOrSize; + return object_error::success; } -char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); +error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI, + char &Result) const { + uint8_t Type, Flags; + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(DRI, Entry); + Type = Entry->Type; + Flags = Entry->Flags; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + Type = Entry->Type; + Flags = Entry->Flags; + } char Char; - switch (Entry->Type & macho::STF_TypeMask) { + switch (Type & macho::STF_TypeMask) { case macho::STT_Undefined: Char = 'u'; break; @@ -154,15 +207,24 @@ char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const { break; } - if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern)) + if (Flags & (macho::STF_External | macho::STF_PrivateExtern)) Char = toupper(Char); - return Char; + Result = Char; + return object_error::success; } -bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - return Entry->Flags & macho::STF_StabsEntryMask; +error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI, + bool &Result) const { + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(DRI, Entry); + Result = Entry->Flags & macho::STF_StabsEntryMask; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + Result = Entry->Flags & macho::STF_StabsEntryMask; + } + return object_error::success; } ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const { @@ -204,10 +266,12 @@ void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const { } } -SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const { +error_code MachOObjectFile::getSectionNext(DataRefImpl DRI, + SectionRef &Result) const { DRI.d.b++; moveToNextSection(DRI); - return SectionRef(DRI, this); + Result = SectionRef(DRI, this); + return object_error::success; } void @@ -219,43 +283,121 @@ MachOObjectFile::getSection(DataRefImpl DRI, MachOObj->ReadSection(LCI, DRI.d.b, Res); } -StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const { - InMemoryStruct<macho::SegmentLoadCommand> SLC; +void +MachOObjectFile::getSection64(DataRefImpl DRI, + InMemoryStruct<macho::Section64> &Res) const { + InMemoryStruct<macho::Segment64LoadCommand> SLC; LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - MachOObj->ReadSegmentLoadCommand(LCI, SLC); - InMemoryStruct<macho::Section> Sect; - MachOObj->ReadSection(LCI, DRI.d.b, Sect); - - static char Result[34]; - strcpy(Result, SLC->Name); - strcat(Result, ","); - strcat(Result, Sect->Name); - return StringRef(Result); + MachOObj->ReadSegment64LoadCommand(LCI, SLC); + MachOObj->ReadSection64(LCI, DRI.d.b, Res); } -uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - return Sect->Address; +static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) { + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + if (LCI.Command.Type == macho::LCT_Segment64) + return true; + assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type."); + return false; } -uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - return Sect->Size; +error_code MachOObjectFile::getSectionName(DataRefImpl DRI, + StringRef &Result) const { + // FIXME: thread safety. + static char result[34]; + if (is64BitLoadCommand(MachOObj, DRI)) { + InMemoryStruct<macho::Segment64LoadCommand> SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegment64LoadCommand(LCI, SLC); + InMemoryStruct<macho::Section64> Sect; + MachOObj->ReadSection64(LCI, DRI.d.b, Sect); + + strcpy(result, Sect->SegmentName); + strcat(result, ","); + strcat(result, Sect->Name); + } else { + InMemoryStruct<macho::SegmentLoadCommand> SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + InMemoryStruct<macho::Section> Sect; + MachOObj->ReadSection(LCI, DRI.d.b, Sect); + + strcpy(result, Sect->SegmentName); + strcat(result, ","); + strcat(result, Sect->Name); + } + Result = StringRef(result); + return object_error::success; } -StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - return MachOObj->getData(Sect->Offset, Sect->Size); +error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI, + uint64_t &Result) const { + if (is64BitLoadCommand(MachOObj, DRI)) { + InMemoryStruct<macho::Section64> Sect; + getSection64(DRI, Sect); + Result = Sect->Address; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + Result = Sect->Address; + } + return object_error::success; } -bool MachOObjectFile::isSectionText(DataRefImpl DRI) const { - InMemoryStruct<macho::SegmentLoadCommand> SLC; - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - MachOObj->ReadSegmentLoadCommand(LCI, SLC); - return !strcmp(SLC->Name, "__TEXT"); +error_code MachOObjectFile::getSectionSize(DataRefImpl DRI, + uint64_t &Result) const { + if (is64BitLoadCommand(MachOObj, DRI)) { + InMemoryStruct<macho::Section64> Sect; + getSection64(DRI, Sect); + Result = Sect->Size; + } else { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + Result = Sect->Size; + } + return object_error::success; +} + +error_code MachOObjectFile::getSectionContents(DataRefImpl DRI, + StringRef &Result) const { + if (is64BitLoadCommand(MachOObj, DRI)) { + InMemoryStruct<macho::Section64> Sect; + getSection64(DRI, Sect); + Result = MachOObj->getData(Sect->Offset, Sect->Size); + } else { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + Result = MachOObj->getData(Sect->Offset, Sect->Size); + } + return object_error::success; +} + +error_code MachOObjectFile::isSectionText(DataRefImpl DRI, + bool &Result) const { + if (is64BitLoadCommand(MachOObj, DRI)) { + InMemoryStruct<macho::Section64> Sect; + getSection64(DRI, Sect); + Result = !strcmp(Sect->Name, "__text"); + } else { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + Result = !strcmp(Sect->Name, "__text"); + } + return object_error::success; +} + +error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, + DataRefImpl Symb, + bool &Result) const { + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(Symb, Entry); + Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b; + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(Symb, Entry); + Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b; + } + return object_error::success; } ObjectFile::section_iterator MachOObjectFile::begin_sections() const { diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 603b23c74e93..9a373ad21bd2 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -41,19 +41,28 @@ LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile, } void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { - // We can't use unwrap() here because the argument to ++ must be an lvalue. - ++*reinterpret_cast<ObjectFile::section_iterator*>(SI); + error_code ec; + unwrap(SI)->increment(ec); + if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message()); } const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { - return (*unwrap(SI))->getName().data(); + StringRef ret; + if (error_code ec = (*unwrap(SI))->getName(ret)) + report_fatal_error(ec.message()); + return ret.data(); } uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { - return (*unwrap(SI))->getSize(); + uint64_t ret; + if (error_code ec = (*unwrap(SI))->getSize(ret)) + report_fatal_error(ec.message()); + return ret; } const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { - return (*unwrap(SI))->getContents().data(); + StringRef ret; + if (error_code ec = (*unwrap(SI))->getContents(ret)) + report_fatal_error(ec.message()); + return ret.data(); } - diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 47b63115a94c..a7798df33fe5 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -21,18 +21,8 @@ using namespace llvm; using namespace object; -ObjectFile::ObjectFile(MemoryBuffer *Object) - : MapFile(Object) { - assert(MapFile && "Must be a valid MemoryBuffer!"); - base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart()); -} - -ObjectFile::~ObjectFile() { - delete MapFile; -} - -StringRef ObjectFile::getFilename() const { - return MapFile->getBufferIdentifier(); +ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec) + : Binary(Type, source) { } ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index c3169acabbc7..c64da6e137ea 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/Support/ErrorHandling.h" @@ -2084,6 +2085,23 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width, return fs; } +/* Same as convertToInteger(integerPart*, ...), except the result is returned in + an APSInt, whose initial bit-width and signed-ness are used to determine the + precision of the conversion. + */ +APFloat::opStatus +APFloat::convertToInteger(APSInt &result, + roundingMode rounding_mode, bool *isExact) const +{ + unsigned bitWidth = result.getBitWidth(); + SmallVector<uint64_t, 4> parts(result.getNumWords()); + opStatus status = convertToInteger( + parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); + // Keeps the original signed-ness. + result = APInt(bitWidth, (unsigned)parts.size(), parts.data()); + return status; +} + /* Convert an unsigned integer SRC to a floating point number, rounding according to ROUNDING_MODE. The sign of the floating point number is not modified. */ diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 74d61c13a5c9..76265d445f45 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -2164,12 +2164,33 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { } void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, - bool Signed) const { + bool Signed, bool formatAsCLiteral) const { assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) && "Radix should be 2, 8, 10, or 16!"); + const char *Prefix = ""; + if (formatAsCLiteral) { + switch (Radix) { + case 2: + // Binary literals are a non-standard extension added in gcc 4.3: + // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html + Prefix = "0b"; + break; + case 8: + Prefix = "0"; + break; + case 16: + Prefix = "0x"; + break; + } + } + // First, check for a zero value and just short circuit the logic below. if (*this == 0) { + while (*Prefix) { + Str.push_back(*Prefix); + ++Prefix; + }; Str.push_back('0'); return; } @@ -2193,6 +2214,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, } } + while (*Prefix) { + Str.push_back(*Prefix); + ++Prefix; + }; + while (N) { *--BufPtr = Digits[N % Radix]; N /= Radix; @@ -2212,6 +2238,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, Str.push_back('-'); } + while (*Prefix) { + Str.push_back(*Prefix); + ++Prefix; + }; + // We insert the digits backward, then reverse them to get the right order. unsigned StartDig = Str.size(); @@ -2251,7 +2282,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, /// to the methods above. std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { SmallString<40> S; - toString(S, Radix, Signed); + toString(S, Radix, Signed, /* formatAsCLiteral = */false); return S.str(); } @@ -2266,7 +2297,7 @@ void APInt::dump() const { void APInt::print(raw_ostream &OS, bool isSigned) const { SmallString<40> S; - this->toString(S, 10, isSigned); + this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); OS << S.str(); } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 7f1c0d320b11..29143377628d 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -911,8 +911,8 @@ size_t alias::getOptionWidth() const { // Print out the option for the alias. void alias::printOptionInfo(size_t GlobalWidth) const { size_t L = std::strlen(ArgStr); - errs() << " -" << ArgStr; - errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; + outs() << " -" << ArgStr; + outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; } //===----------------------------------------------------------------------===// diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index 493f7083dbb3..81382d08dc23 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -529,8 +529,8 @@ ConstantRange::sub(const ConstantRange &Other) const { return ConstantRange(getBitWidth(), /*isFullSet=*/true); APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize(); - APInt NewLower = getLower() - Other.getLower(); - APInt NewUpper = getUpper() - Other.getUpper() + 1; + APInt NewLower = getLower() - Other.getUpper() + 1; + APInt NewUpper = getUpper() - Other.getLower(); if (NewLower == NewUpper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 4299aa4e931d..c525a1228129 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -214,7 +214,12 @@ std::string sys::getHostCPUName() { // As found in a Summer 2010 model iMac. case 37: // Intel Core i7, laptop version. return "corei7"; - case 42: // SandyBridge + + // SandyBridge: + case 42: // Intel Core i7 processor. All processors are manufactured + // using the 32 nm process. + case 44: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. case 45: return "corei7-avx"; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index dbdb303a4fdd..7e094ee78f36 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -113,6 +113,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Win32: return "win32"; case Haiku: return "haiku"; case Minix: return "minix"; + case RTEMS: return "rtems"; } return "<invalid>"; @@ -281,7 +282,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return cellspu; else if (ArchName == "msp430") return msp430; - else if (ArchName == "mips" || ArchName == "mipsallegrex") + else if (ArchName == "mips" || ArchName == "mipseb" || + ArchName == "mipsallegrex") return mips; else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" || ArchName == "psp") @@ -350,6 +352,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) { return Haiku; else if (OSName.startswith("minix")) return Minix; + else if (OSName.startswith("rtems")) + return RTEMS; else return UnknownOS; } diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp index 75cea2961a9d..d62123cc985e 100644 --- a/lib/Support/Twine.cpp +++ b/lib/Support/Twine.cpp @@ -14,6 +14,11 @@ using namespace llvm; std::string Twine::str() const { + // If we're storing only a std::string, just return it. + if (LHSKind == StdStringKind && RHSKind == EmptyKind) + return *static_cast<const std::string*>(LHS); + + // Otherwise, flatten and copy the contents first. SmallString<256> Vec; return toStringRef(Vec).str(); } @@ -37,9 +42,9 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const { // Already null terminated, yay! return StringRef(static_cast<const char*>(LHS)); case StdStringKind: { - const std::string *str = static_cast<const std::string*>(LHS); - return StringRef(str->c_str(), str->size()); - } + const std::string *str = static_cast<const std::string*>(LHS); + return StringRef(str->c_str(), str->size()); + } default: break; } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 430cf2ed8e8f..f295b92e4a5b 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -842,6 +842,9 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { // Save the name path = FNBuffer; + + // By default mkstemp sets the mode to 0600, so update mode bits now. + AddPermissionBits (*this, 0666); #elif defined(HAVE_MKTEMP) // If we don't have mkstemp, use the old and obsolete mktemp function. if (mktemp(FNBuffer) == 0) diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 4227844ae506..fc5f5809cb40 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -115,7 +115,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { E = OpenedHandles.end(); I != E; ++I) { FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); if (ptr) { - return (void *) ptr; + return (void *)(intptr_t)ptr; } } diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc index 84862d69e2b5..379645d2ff60 100644 --- a/lib/Support/Windows/explicit_symbols.inc +++ b/lib/Support/Windows/explicit_symbols.inc @@ -2,7 +2,7 @@ #ifdef HAVE__ALLOCA EXPLICIT_SYMBOL(_alloca) - EXPLICIT_SYMBOL2(alloca, _alloca); + EXPLICIT_SYMBOL2(alloca, _alloca) #endif #ifdef HAVE___ALLOCA EXPLICIT_SYMBOL(__alloca) @@ -62,5 +62,5 @@ /* msvcrt */ #if defined(_MSC_VER) - EXPLICIT_SYMBOL2(alloca, _alloca_probe); + EXPLICIT_SYMBOL2(alloca, _alloca_probe) #endif diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 4679f7443bfc..08dc340f8541 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -16,24 +16,29 @@ #define TARGET_ARM_H #include "ARMBaseInfo.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include <cassert> namespace llvm { +class ARMAsmPrinter; class ARMBaseTargetMachine; class FunctionPass; class JITCodeEmitter; -class formatted_raw_ostream; -class MCCodeEmitter; -class TargetAsmBackend; class MachineInstr; -class ARMAsmPrinter; +class MCCodeEmitter; class MCInst; +class MCInstrInfo; +class MCObjectWriter; +class MCSubtargetInfo; +class TargetAsmBackend; +class formatted_raw_ostream; -MCCodeEmitter *createARMMCCodeEmitter(const Target &, - TargetMachine &TM, +MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, MCContext &Ctx); TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &); @@ -53,11 +58,15 @@ FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); -extern Target TheARMTarget, TheThumbTarget; - void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); +/// createARMMachObjectWriter - Construct an ARM Mach-O object writer. +MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + } // end namespace llvm; #endif diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 6af5f85e8a85..cf333ccd49ba 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -16,18 +16,26 @@ include "llvm/Target/Target.td" +//===----------------------------------------------------------------------===// +// ARM Subtarget state. +// + +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", + "Thumb mode">; //===----------------------------------------------------------------------===// // ARM Subtarget features. // -def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", "Enable VFP2 instructions">; -def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", - "Enable VFP3 instructions">; -def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON", - "Enable NEON instructions">; -def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", +def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", + "Enable VFP3 instructions", + [FeatureVFP2]>; +def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", + "Enable NEON instructions", + [FeatureVFP3]>; +def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; @@ -75,32 +83,32 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +/// Some M architectures don't have the DSP extension (v7E-M vs. v7M) +def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", + "Supports v7 DSP instructions in Thumb2.">; + // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; -// ARM architectures. -def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", - "ARM v4T">; -def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", - "ARM v5T">; -def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", - "ARM v5TE, v5TEj, v5TExp">; -def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", - "ARM v6">; -def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M", - "ARM v6m", - [FeatureNoARM, FeatureDB]>; -def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", - "ARM v6t2", - [FeatureThumb2]>; -def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", - "ARM v7A", - [FeatureThumb2, FeatureNEON, FeatureDB]>; -def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", - "ARM v7M", - [FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv]>; +// ARM ISAs. +def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", + "Support ARM v4T instructions">; +def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", + "Support ARM v5T instructions", + [HasV4TOps]>; +def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", + "Support ARM v5TE, v5TEj, and v5TExp instructions", + [HasV5TOps]>; +def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", + "Support ARM v6 instructions", + [HasV5TEOps]>; +def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", + "Support ARM v6t2 instructions", + [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>; +def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", + "Support ARM v7 instructions", + [HasV6T2Ops]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -109,8 +117,6 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", include "ARMSchedule.td" // ARM processor families. -def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", - "One of the other ARM processor families">; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureNEONForFP, @@ -135,64 +141,76 @@ def : ProcNoItin<"strongarm1100", []>; def : ProcNoItin<"strongarm1110", []>; // V4T Processors. -def : ProcNoItin<"arm7tdmi", [ArchV4T]>; -def : ProcNoItin<"arm7tdmi-s", [ArchV4T]>; -def : ProcNoItin<"arm710t", [ArchV4T]>; -def : ProcNoItin<"arm720t", [ArchV4T]>; -def : ProcNoItin<"arm9", [ArchV4T]>; -def : ProcNoItin<"arm9tdmi", [ArchV4T]>; -def : ProcNoItin<"arm920", [ArchV4T]>; -def : ProcNoItin<"arm920t", [ArchV4T]>; -def : ProcNoItin<"arm922t", [ArchV4T]>; -def : ProcNoItin<"arm940t", [ArchV4T]>; -def : ProcNoItin<"ep9312", [ArchV4T]>; +def : ProcNoItin<"arm7tdmi", [HasV4TOps]>; +def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>; +def : ProcNoItin<"arm710t", [HasV4TOps]>; +def : ProcNoItin<"arm720t", [HasV4TOps]>; +def : ProcNoItin<"arm9", [HasV4TOps]>; +def : ProcNoItin<"arm9tdmi", [HasV4TOps]>; +def : ProcNoItin<"arm920", [HasV4TOps]>; +def : ProcNoItin<"arm920t", [HasV4TOps]>; +def : ProcNoItin<"arm922t", [HasV4TOps]>; +def : ProcNoItin<"arm940t", [HasV4TOps]>; +def : ProcNoItin<"ep9312", [HasV4TOps]>; // V5T Processors. -def : ProcNoItin<"arm10tdmi", [ArchV5T]>; -def : ProcNoItin<"arm1020t", [ArchV5T]>; +def : ProcNoItin<"arm10tdmi", [HasV5TOps]>; +def : ProcNoItin<"arm1020t", [HasV5TOps]>; // V5TE Processors. -def : ProcNoItin<"arm9e", [ArchV5TE]>; -def : ProcNoItin<"arm926ej-s", [ArchV5TE]>; -def : ProcNoItin<"arm946e-s", [ArchV5TE]>; -def : ProcNoItin<"arm966e-s", [ArchV5TE]>; -def : ProcNoItin<"arm968e-s", [ArchV5TE]>; -def : ProcNoItin<"arm10e", [ArchV5TE]>; -def : ProcNoItin<"arm1020e", [ArchV5TE]>; -def : ProcNoItin<"arm1022e", [ArchV5TE]>; -def : ProcNoItin<"xscale", [ArchV5TE]>; -def : ProcNoItin<"iwmmxt", [ArchV5TE]>; +def : ProcNoItin<"arm9e", [HasV5TEOps]>; +def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>; +def : ProcNoItin<"arm946e-s", [HasV5TEOps]>; +def : ProcNoItin<"arm966e-s", [HasV5TEOps]>; +def : ProcNoItin<"arm968e-s", [HasV5TEOps]>; +def : ProcNoItin<"arm10e", [HasV5TEOps]>; +def : ProcNoItin<"arm1020e", [HasV5TEOps]>; +def : ProcNoItin<"arm1022e", [HasV5TEOps]>; +def : ProcNoItin<"xscale", [HasV5TEOps]>; +def : ProcNoItin<"iwmmxt", [HasV5TEOps]>; // V6 Processors. -def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>; -def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2, +def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; -def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2, +def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; -def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; -def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2, +def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>; +def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; // V6M Processors. -def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>; +def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM, + FeatureDB]>; // V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2, +def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; -// V7 Processors. +// V7a Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, ProcA8]>; + [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, ProcA9]>; + [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2]>; def : Processor<"cortex-a9-mp", CortexA9Itineraries, - [ArchV7A, ProcA9, FeatureMP]>; + [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureMP]>; // V7M Processors. -def : ProcNoItin<"cortex-m3", [ArchV7M]>; -def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>; +def : ProcNoItin<"cortex-m3", [HasV7Ops, + FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv]>; + +// V7EM Processors. +def : ProcNoItin<"cortex-m4", [HasV7Ops, + FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv, FeatureDSPThumb2, + FeatureT2XtPk, FeatureVFP2, + FeatureVFPOnlySP]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp index 618a2b5f3eac..5e438a976732 100644 --- a/lib/Target/ARM/ARMAsmBackend.cpp +++ b/lib/Target/ARM/ARMAsmBackend.cpp @@ -28,14 +28,6 @@ using namespace llvm; namespace { -class ARMMachObjectWriter : public MCMachObjectTargetWriter { -public: - ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) - : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} -}; - class ARMELFObjectWriter : public MCELFObjectTargetWriter { public: ARMELFObjectWriter(Triple::OSType OSType) @@ -182,7 +174,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value >>= 16; // Fallthrough case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16_pcrel: //FIXME: Shouldn't this be shifted like + // the other hi16 fixup? case ARM::fixup_t2_movw_lo16_pcrel: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned i = (Value & 0x800) >> 11; @@ -192,8 +185,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // inst{26} = i; // inst{14-12} = Mid3; // inst{7-0} = Lo8; - assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) && - "Out of range pc-relative fixup value!"); + // The value comes in as the whole thing, not just the portion required + // for this fixup, so we need to mask off the bits not handled by this + // portion (lo vs. hi). + Value &= 0xffff; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); uint64_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; @@ -423,12 +418,9 @@ public: : ARMAsmBackend(T), Subtype(st) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createMachObjectWriter(new ARMMachObjectWriter( - /*Is64Bit=*/false, - object::mach::CTM_ARM, - Subtype), - OS, - /*IsLittleEndian=*/true); + return createARMMachObjectWriter(OS, /*Is64Bit=*/false, + object::mach::CTM_ARM, + Subtype); } void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, @@ -505,7 +497,13 @@ TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - if (TheTriple.getArchName() == "armv6" || + if (TheTriple.getArchName() == "armv4t" || + TheTriple.getArchName() == "thumbv4t") + return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T); + else if (TheTriple.getArchName() == "armv5e" || + TheTriple.getArchName() == "thumbv5e") + return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ); + else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") return new DarwinARMAsmBackend(T, object::mach::CSARM_V6); return new DarwinARMAsmBackend(T, object::mach::CSARM_V7); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index eb7390236df3..dbc3ee41f3da 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -654,7 +654,7 @@ void ARMAsmPrinter::emitAttributes() { } /* TODO: ARMBuildAttrs::Allowed is not completely accurate, - * since NEON can have 1 (allowed) or 2 (fused MAC operations) */ + * since NEON can have 1 (allowed) or 2 (MAC operations) */ if (Subtarget->hasNEON()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, ARMBuildAttrs::Allowed); @@ -1010,19 +1010,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { MI->dump(); assert(0 && "Unsupported opcode for unwinding information"); case ARM::MOVr: - case ARM::tMOVgpr2gpr: - case ARM::tMOVgpr2tgpr: Offset = 0; break; case ARM::ADDri: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: - case ARM::t2SUBrSPi: - Offset = MI->getOperand(2).getImm(); + Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: - Offset = MI->getOperand(2).getImm()*4; + Offset = MI->getOperand(2).getImm()*4; break; case ARM::tADDspi: case ARM::tADDrSPi: @@ -1072,39 +1069,18 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { extern cl::opt<bool> EnableARMEHABI; +// Simple pseudo-instructions have their lowering (with expansion to real +// instructions) auto-generated. +#include "ARMGenMCPseudoLowering.inc" + void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::B: { - // B is just a Bcc with an 'always' predicate. - MCInst TmpInst; - LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - TmpInst.setOpcode(ARM::Bcc); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - return; - } - case ARM::LDMIA_RET: { - // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as - // such has additional code-gen properties and scheduling information. - // To emit it, we just construct as normal and set the opcode to LDMIA_UPD. - MCInst TmpInst; - LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - TmpInst.setOpcode(ARM::LDMIA_UPD); - OutStreamer.EmitInstruction(TmpInst); + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) return; - } - case ARM::t2ADDrSPi: - case ARM::t2ADDrSPi12: - case ARM::t2SUBrSPi: - case ARM::t2SUBrSPi12: - assert ((MI->getOperand(1).getReg() == ARM::SP) && - "Unexpected source register!"); - break; + // Check for manual lowerings. + unsigned Opc = MI->getOpcode(); + switch (Opc) { case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: { if (isVerbose() && OutStreamer.hasRawTextSupport()) { @@ -1115,14 +1091,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::tBfar: { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tBL); - TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MI->getOperand(0).getMBB()->getSymbol(), OutContext))); - OutStreamer.EmitInstruction(TmpInst); - return; - } case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { @@ -1153,39 +1121,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } - case ARM::MOVPCRX: { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - return; - } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). - case ARM::BLr9: - case ARM::BLr9_pred: - case ARM::BLXr9: - case ARM::BLXr9_pred: { - unsigned newOpc; - switch (Opc) { - default: assert(0); - case ARM::BLr9: newOpc = ARM::BL; break; - case ARM::BLr9_pred: newOpc = ARM::BL_pred; break; - case ARM::BLXr9: newOpc = ARM::BLX; break; - case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break; - } - MCInst TmpInst; - LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - TmpInst.setOpcode(newOpc); - OutStreamer.EmitInstruction(TmpInst); - return; - } case ARM::BXr9_CALL: case ARM::BX_CALL: { { @@ -1215,6 +1152,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } { @@ -1445,7 +1385,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVgpr2gpr); + TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); // Add predicate operands. @@ -1494,7 +1434,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // mov pc, target MCInst TmpInst; unsigned Opc = MI->getOpcode() == ARM::BR_JTr ? - ARM::MOVr : ARM::tMOVgpr2gpr; + ARM::MOVr : ARM::tMOVr; TmpInst.setOpcode(Opc); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); @@ -1507,7 +1447,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); // Make sure the Thumb jump table is 4-byte aligned. - if (Opc == ARM::tMOVgpr2gpr) + if (Opc == ARM::tMOVr) EmitAlignment(2); // Output the data for the jump table itself @@ -1599,11 +1539,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *Label = GetARMSJLJEHLabel(); { MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVgpr2tgpr); + TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ValReg)); TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // 's' bit operand - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.AddComment("eh_setjmp begin"); OutStreamer.EmitInstruction(TmpInst); } @@ -1817,7 +1758,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } { MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVtgpr2gpr); + TmpInst.setOpcode(ARM::tMOVr); TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); // Predicate. @@ -1858,75 +1799,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - // Tail jump branches are really just branch instructions with additional - // code-gen attributes. Convert them to the canonical form here. - case ARM::TAILJMPd: - case ARM::TAILJMPdND: { - MCInst TmpInst, TmpInst2; - // Lower the instruction as-is to get the operands properly converted. - LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); - TmpInst.setOpcode(ARM::Bcc); - TmpInst.addOperand(TmpInst2.getOperand(0)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("TAILCALL"); - OutStreamer.EmitInstruction(TmpInst); - return; - } - case ARM::tTAILJMPd: - case ARM::tTAILJMPdND: { - MCInst TmpInst, TmpInst2; - LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); - // The Darwin toolchain doesn't support tail call relocations of 16-bit - // branches. - TmpInst.setOpcode(Opc == ARM::tTAILJMPd ? ARM::t2B : ARM::tB); - TmpInst.addOperand(TmpInst2.getOperand(0)); - OutStreamer.AddComment("TAILCALL"); - OutStreamer.EmitInstruction(TmpInst); - return; - } - case ARM::TAILJMPrND: - case ARM::tTAILJMPrND: - case ARM::TAILJMPr: - case ARM::tTAILJMPr: { - unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND) - ? ARM::BX : ARM::tBX; - MCInst TmpInst; - TmpInst.setOpcode(newOpc); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("TAILCALL"); - OutStreamer.EmitInstruction(TmpInst); - return; - } - - // These are the pseudos created to comply with stricter operand restrictions - // on ARMv5. Lower them now to "normal" instructions, since all the - // restrictions are already satisfied. - case ARM::MULv5: - EmitPatchedInstruction(MI, ARM::MUL); - return; - case ARM::MLAv5: - EmitPatchedInstruction(MI, ARM::MLA); - return; - case ARM::SMULLv5: - EmitPatchedInstruction(MI, ARM::SMULL); - return; - case ARM::UMULLv5: - EmitPatchedInstruction(MI, ARM::UMULL); - return; - case ARM::SMLALv5: - EmitPatchedInstruction(MI, ARM::SMLAL); - return; - case ARM::UMLALv5: - EmitPatchedInstruction(MI, ARM::UMLAL); - return; - case ARM::UMAALv5: - EmitPatchedInstruction(MI, ARM::UMAAL); - return; } MCInst TmpInst; @@ -1944,11 +1816,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { //===----------------------------------------------------------------------===// static MCInstPrinter *createARMMCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(TM, MAI); + return new ARMInstPrinter(MAI); return 0; } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 5f9169ef7f77..7741fc4b34e8 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -21,6 +21,8 @@ namespace llvm { +class MCOperand; + namespace ARM { enum DW_ISA { DW_ISA_ARM_thumb = 1, @@ -72,6 +74,9 @@ public: void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + private: // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() void emitAttributes(); @@ -84,6 +89,10 @@ private: void EmitUnwindingInstruction(const MachineInstr *MI); + // emitPseudoExpansionLowering - tblgen'erated. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + public: void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); @@ -100,6 +109,7 @@ public: llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; } + MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const; MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; @@ -107,7 +117,7 @@ public: MCSymbol *GetARMSJLJEHLabel(void) const; MCSymbol *GetARMGVSymbol(const GlobalValue *GV); - + /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h index 36edbad7a601..458f7dd1f784 100644 --- a/lib/Target/ARM/ARMBaseInfo.h +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -17,20 +17,12 @@ #ifndef ARMBASEINFO_H #define ARMBASEINFO_H +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" // Note that the following auto-generated files only defined enum types, and // so are safe to include here. -// Defines symbolic names for ARM registers. This defines a mapping from -// register name to register number. -// -#include "ARMGenRegisterNames.inc" - -// Defines symbolic names for the ARM instructions. -// -#include "ARMGenInstrNames.inc" - namespace llvm { // Enums corresponding to ARM condition codes diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 44a397611526..649bd7d5ce3f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -18,7 +18,6 @@ #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "ARMGenInstrInfo.inc" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -31,10 +30,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/STLExtras.h" + +#define GET_INSTRINFO_CTOR +#include "ARMGenInstrInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -74,7 +78,7 @@ static const ARM_MLxEntry ARM_MLxTable[] = { }; ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), + : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), Subtarget(STI) { for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) @@ -136,9 +140,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *UpdateMI = NULL; MachineInstr *MemMI = NULL; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - bool isLoad = !TID.mayStore(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MCID.getNumOperands(); + bool isLoad = !MCID.mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -475,8 +479,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { // FIXME: This confuses implicit_def with optional CPSR def. - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) return false; bool Found = false; @@ -495,11 +499,11 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isPredicable()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isPredicable()) return false; - if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -524,35 +528,23 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const MachineFunction *MF = MBB.getParent(); const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); - // Basic size info comes from the TSFlags field. - const TargetInstrDesc &TID = MI->getDesc(); - uint64_t TSFlags = TID.TSFlags; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getSize()) + return MCID.getSize(); - unsigned Opc = MI->getOpcode(); - switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { - default: { // If this machine instr is an inline asm, measure it. if (MI->getOpcode() == ARM::INLINEASM) return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); if (MI->isLabel()) return 0; + unsigned Opc = MI->getOpcode(); switch (Opc) { - default: - llvm_unreachable("Unknown or unset size field for instr!"); case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; - } - break; - } - case ARMII::Size8Bytes: return 8; // ARM instruction x 2. - case ARMII::Size4Bytes: return 4; // ARM / Thumb2 instruction. - case ARMII::Size2Bytes: return 2; // Thumb1 instruction. - case ARMII::SizeSpecial: { - switch (Opc) { case ARM::MOVi16_ga_pcrel: case ARM::MOVTi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: @@ -588,9 +580,9 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // entry is one byte; TBH two byte each. unsigned EntrySize = (Opc == ARM::t2TBB_JT) ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = TID.getNumOperands(); + unsigned NumOps = MCID.getNumOperands(); MachineOperand JTOP = - MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); + MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); assert(MJTI != 0); @@ -616,8 +608,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // Otherwise, pseudo-instruction sizes are zero. return 0; } - } - } return 0; // Not reached } @@ -647,7 +637,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVQ; + Opc = ARM::VORRq; else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVQQ; else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) @@ -657,6 +647,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); MIB.addReg(SrcReg, getKillRegState(KillSrc)); + if (Opc == ARM::VORRq) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) AddDefaultPred(MIB); } @@ -788,7 +780,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, break; case ARM::STRi12: case ARM::t2STRi12: - case ARM::tSpill: + case ARM::tSTRspi: case ARM::VSTRD: case ARM::VSTRS: if (MI->getOperand(1).isFI() && @@ -923,7 +915,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, break; case ARM::LDRi12: case ARM::t2LDRi12: - case ARM::tRestore: + case ARM::tLDRspi: case ARM::VLDRD: case ARM::VLDRS: if (MI->getOperand(1).isFI() && @@ -1269,20 +1261,20 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; } -bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - unsigned ExtraPredCycles, - float Probability, - float Confidence) const { +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const { if (!NumCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - float UnpredCost = Probability * NumCycles; - UnpredCost += 1.0; // The branch itself - UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); + unsigned UnpredCost = Probability.getNumerator() * NumCycles; + UnpredCost /= Probability.getDenominator(); + UnpredCost += 1; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() / 10; - return (float)(NumCycles + ExtraPredCycles) < UnpredCost; + return (NumCycles + ExtraPredCycles) <= UnpredCost; } bool ARMBaseInstrInfo:: @@ -1290,16 +1282,23 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned TCycles, unsigned TExtra, MachineBasicBlock &FMBB, unsigned FCycles, unsigned FExtra, - float Probability, float Confidence) const { + const BranchProbability &Probability) const { if (!TCycles || !FCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles; - UnpredCost += 1.0; // The branch itself - UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); - - return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost; + unsigned TUnpredCost = Probability.getNumerator() * TCycles; + TUnpredCost /= Probability.getDenominator(); + + uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); + unsigned FUnpredCost = Comp * FCycles; + FUnpredCost /= Probability.getDenominator(); + + unsigned UnpredCost = TUnpredCost + FUnpredCost; + UnpredCost += 1; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() / 10; + + return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -1363,7 +1362,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; @@ -1803,7 +1802,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; if (UOps) @@ -1906,10 +1905,10 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const { - int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; + int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; if (RegNo <= 0) // Def is the address writeback. return ItinData->getOperandCycle(DefClass, DefIdx); @@ -1924,7 +1923,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo; bool isSLoad = false; - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLDMSIA: case ARM::VLDMSIA_UPD: @@ -1947,10 +1946,10 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const { - int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; + int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; if (RegNo <= 0) // Def is the address writeback. return ItinData->getOperandCycle(DefClass, DefIdx); @@ -1982,10 +1981,10 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const { - int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; + int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; if (RegNo <= 0) return ItinData->getOperandCycle(UseClass, UseIdx); @@ -1999,7 +1998,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo; bool isSStore = false; - switch (UseTID.getOpcode()) { + switch (UseMCID.getOpcode()) { default: break; case ARM::VSTMSIA: case ARM::VSTMSIA_UPD: @@ -2022,10 +2021,10 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const { - int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; + int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; if (RegNo <= 0) return ItinData->getOperandCycle(UseClass, UseIdx); @@ -2051,14 +2050,14 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefIdx, unsigned DefAlign, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const { - unsigned DefClass = DefTID.getSchedClass(); - unsigned UseClass = UseTID.getSchedClass(); + unsigned DefClass = DefMCID.getSchedClass(); + unsigned UseClass = UseMCID.getSchedClass(); - if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands()) + if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); // This may be a def / use of a variable_ops instruction, the operand @@ -2066,7 +2065,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // figure it out. int DefCycle = -1; bool LdmBypass = false; - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); break; @@ -2077,7 +2076,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLDMSIA: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: - DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); + DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); break; case ARM::LDMIA_RET: @@ -2098,7 +2097,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: LdmBypass = 1; - DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); + DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); break; } @@ -2107,7 +2106,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefCycle = 2; int UseCycle = -1; - switch (UseTID.getOpcode()) { + switch (UseMCID.getOpcode()) { default: UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); break; @@ -2118,7 +2117,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VSTMSIA: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: - UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); + UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); break; case ARM::STMIA: @@ -2137,7 +2136,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2STMDB: case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: - UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); + UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); break; } @@ -2150,7 +2149,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (LdmBypass) { // It's a variable_ops instruction so we can't use DefIdx here. Just use // first def operand. - if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1, + if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, UseClass, UseIdx)) --UseCycle; } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, @@ -2170,11 +2169,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const TargetInstrDesc &DefTID = DefMI->getDesc(); + const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefTID.mayLoad() ? 3 : 1; + return DefMCID.mayLoad() ? 3 : 1; - const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); if (DefMO.getReg() == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { @@ -2183,7 +2182,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } // CPSR set and branch can be paired in the same cycle. - if (UseTID.isBranch()) + if (UseMCID.isBranch()) return 0; } @@ -2191,14 +2190,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, - UseTID, UseIdx, UseAlign); + int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, + UseMCID, UseIdx, UseAlign); if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2223,7 +2222,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -2327,37 +2326,37 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!DefNode->isMachineOpcode()) return 1; - const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode()); + const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); - if (isZeroCost(DefTID.Opcode)) + if (isZeroCost(DefMCID.Opcode)) return 0; if (!ItinData || ItinData->isEmpty()) - return DefTID.mayLoad() ? 3 : 1; + return DefMCID.mayLoad() ? 3 : 1; if (!UseNode->isMachineOpcode()) { - int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx); + int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); if (Subtarget.isCortexA9()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; } - const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode()); + const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); unsigned DefAlign = !DefMN->memoperands_empty() ? (*DefMN->memoperands_begin())->getAlignment() : 0; const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); unsigned UseAlign = !UseMN->memoperands_empty() ? (*UseMN->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, - UseTID, UseIdx, UseAlign); + int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, + UseMCID, UseIdx, UseAlign); if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2384,7 +2383,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefTID.getOpcode()) { + switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8Pseudo: case ARM::VLD1q16Pseudo: @@ -2503,10 +2502,10 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; - const TargetInstrDesc &TID = MI->getDesc(); - unsigned Class = TID.getSchedClass(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR)) + if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 9a2faf8f9aae..507e8974bf7b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -20,6 +20,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#define GET_INSTRINFO_HEADER +#include "ARMGenInstrInfo.inc" + namespace llvm { class ARMSubtarget; class ARMBaseRegisterInfo; @@ -36,24 +39,16 @@ namespace ARMII { // This four-bit field describes the addressing mode used. AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h - // Size* - Flags to keep track of the size of an instruction. - SizeShift = 5, - SizeMask = 7 << SizeShift, - SizeSpecial = 1, // 0 byte pseudo or special case. - Size8Bytes = 2, - Size4Bytes = 3, - Size2Bytes = 4, - // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load // and store ops only. Generic "updating" flag is used for ld/st multiple. // The index mode enums are declared in ARMBaseInfo.h - IndexModeShift = 8, + IndexModeShift = 5, IndexModeMask = 3 << IndexModeShift, //===------------------------------------------------------------------===// // Instruction encoding formats. // - FormShift = 10, + FormShift = 7, FormMask = 0x3f << FormShift, // Pseudo instructions @@ -126,15 +121,15 @@ namespace ARMII { // UnaryDP - Indicates this is a unary data processing instruction, i.e. // it doesn't have a Rn operand. - UnaryDP = 1 << 16, + UnaryDP = 1 << 13, // Xform16Bit - Indicates this Thumb2 instruction may be transformed into // a 16-bit Thumb instruction if certain conditions are met. - Xform16Bit = 1 << 17, + Xform16Bit = 1 << 14, //===------------------------------------------------------------------===// // Code domain. - DomainShift = 18, + DomainShift = 15, DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, @@ -172,7 +167,7 @@ namespace ARMII { }; } -class ARMBaseInstrInfo : public TargetInstrInfoImpl { +class ARMBaseInstrInfo : public ARMGenInstrInfo { const ARMSubtarget &Subtarget; protected: @@ -291,8 +286,8 @@ public: int64_t &Offset1, int64_t &Offset2)const; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should - /// be scheduled togther. On some targets if two loads are loading from + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads + /// should be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets /// from the common base address. It returns true if it decides it's desirable @@ -308,18 +303,18 @@ public: virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - float Prob, float Confidence) const; + const BranchProbability &Probability) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, unsigned ExtraT, MachineBasicBlock &FMBB, unsigned NumF, unsigned ExtraF, - float Probability, float Confidence) const; + const BranchProbability &Probability) const; virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - float Probability, - float Confidence) const { + const BranchProbability + &Probability) const { return NumCycles == 1; } @@ -353,25 +348,25 @@ public: SDNode *UseNode, unsigned UseIdx) const; private: int getVLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const; int getLDMDefCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefClass, unsigned DefIdx, unsigned DefAlign) const; int getVSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const; int getSTMUseCycle(const InstrItineraryData *ItinData, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseClass, unsigned UseIdx, unsigned UseAlign) const; int getOperandLatency(const InstrItineraryData *ItinData, - const TargetInstrDesc &DefTID, + const MCInstrDesc &DefMCID, unsigned DefIdx, unsigned DefAlign, - const TargetInstrDesc &UseTID, + const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; int getInstrLatency(const InstrItineraryData *ItinData, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 4ab37f6cc759..ba422952ac1a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -40,6 +40,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#define GET_REGINFO_TARGET_DESC +#include "ARMGenRegisterInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -54,8 +57,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), - TII(tii), STI(sti), + : ARMGenRegisterInfo(), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -100,6 +102,12 @@ getReservedRegs(const MachineFunction &MF) const { // Some targets reserve R9. if (STI.isR9Reserved()) Reserved.set(ARM::R9); + // Reserve D16-D31 if the subtarget doesn't support them. + if (!STI.hasVFP3() || STI.hasD16()) { + assert(ARM::D31 == ARM::D16 + 15); + for (unsigned i = 0; i != 16; ++i) + Reserved.set(ARM::D16 + i); + } return Reserved; } @@ -387,12 +395,12 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -/// getAllocationOrder - Returns the register allocation order for a specified -/// register class in the form of a pair of TargetRegisterClass iterators. -std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> -ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const { +/// getRawAllocationOrder - Returns the register allocation order for a +/// specified register class with a target-dependent hint. +ArrayRef<unsigned> +ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); // Alternative register allocation orders when favoring even / odd registers // of register pairs. @@ -469,70 +477,54 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, // We only support even/odd hints for GPR and rGPR. if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass) - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); + return RC->getRawAllocationOrder(MF); if (HintType == ARMRI::RegPairEven) { if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) // It's no longer possible to fulfill this hint. Return the default // allocation order. - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); + return RC->getRawAllocationOrder(MF); if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) - return std::make_pair(GPREven1, - GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPREven1); else - return std::make_pair(GPREven4, - GPREven4 + (sizeof(GPREven4)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPREven4); } else if (FramePtr == ARM::R7) { if (!STI.isR9Reserved()) - return std::make_pair(GPREven2, - GPREven2 + (sizeof(GPREven2)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPREven2); else - return std::make_pair(GPREven5, - GPREven5 + (sizeof(GPREven5)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPREven5); } else { // FramePtr == ARM::R11 if (!STI.isR9Reserved()) - return std::make_pair(GPREven3, - GPREven3 + (sizeof(GPREven3)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPREven3); else - return std::make_pair(GPREven6, - GPREven6 + (sizeof(GPREven6)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPREven6); } } else if (HintType == ARMRI::RegPairOdd) { if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) // It's no longer possible to fulfill this hint. Return the default // allocation order. - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); + return RC->getRawAllocationOrder(MF); if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) - return std::make_pair(GPROdd1, - GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPROdd1); else - return std::make_pair(GPROdd4, - GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPROdd4); } else if (FramePtr == ARM::R7) { if (!STI.isR9Reserved()) - return std::make_pair(GPROdd2, - GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPROdd2); else - return std::make_pair(GPROdd5, - GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPROdd5); } else { // FramePtr == ARM::R11 if (!STI.isR9Reserved()) - return std::make_pair(GPROdd3, - GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPROdd3); else - return std::make_pair(GPROdd6, - GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned))); + return ArrayRef<unsigned>(GPROdd6); } } - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); + return RC->getRawAllocationOrder(MF); } /// ResolveRegAllocHint - Resolves the specified register allocation hint @@ -965,7 +957,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); int64_t InstrOffs = 0;; int Scale = 1; @@ -1115,11 +1107,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const TargetInstrDesc &TID = TII.get(ADDriOpc); + const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MRI.constrainRegClass(BaseReg, TID.OpInfo[0].getRegClass(this)); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); - MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, TID, BaseReg) + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) @@ -1155,7 +1147,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; @@ -1291,11 +1283,5 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Update the original instruction to use the scratch register. MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - if (MI.getOpcode() == ARM::t2ADDrSPi) - MI.setDesc(TII.get(ARM::t2ADDri)); - else if (MI.getOpcode() == ARM::t2SUBrSPi) - MI.setDesc(TII.get(ARM::t2SUBri)); } } - -#include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index c60d75a6893d..b4b4059e7361 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -16,7 +16,9 @@ #include "ARM.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "ARMGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "ARMGenRegisterInfo.inc" namespace llvm { class ARMSubtarget; @@ -134,10 +136,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; - std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> - getAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const; + ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const; unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 16d4ca599a06..d6fca6277501 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -96,13 +96,13 @@ namespace { void addPCLabel(unsigned LabelID); void emitPseudoInstruction(const MachineInstr &MI); unsigned getMachineSoRegOpValue(const MachineInstr &MI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, const MachineOperand &MO, unsigned OpIdx); unsigned getMachineSoImmOpValue(unsigned SoImm); unsigned getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const; + const MCInstrDesc &MCID) const; void emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd = 0, @@ -443,9 +443,9 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); else if (MO.isCPI()) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // For VFP load, the immediate offset is multiplied by 4. - unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) + unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; emitConstPoolAddress(MO.getIndex(), Reloc); } else if (MO.isJTI()) @@ -757,7 +757,7 @@ void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { // It's basically add r, pc, (LJTI - $+8) - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Emit the 'add' instruction. unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 @@ -766,7 +766,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= II->getPredicate(&MI) << ARMII::CondShift; // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, TID); + Binary |= getAddrModeSBit(MI, MCID); // Encode Rd. Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; @@ -912,7 +912,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, - const TargetInstrDesc &TID, + const MCInstrDesc &MCID, const MachineOperand &MO, unsigned OpIdx) { unsigned Binary = getMachineOpValue(MI, MO); @@ -982,8 +982,8 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { } unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const { - for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){ + const MCInstrDesc &MCID) const { + for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; @@ -994,7 +994,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1003,10 +1003,10 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= II->getPredicate(&MI) << ARMII::CondShift; // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, TID); + Binary |= getAddrModeSBit(MI, MCID); // Encode register def if there is one. - unsigned NumDefs = TID.getNumDefs(); + unsigned NumDefs = MCID.getNumDefs(); unsigned OpIdx = 0; if (NumDefs) Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; @@ -1014,7 +1014,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, // Special handling for implicit use (e.g. PC). Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); - if (TID.Opcode == ARM::MOVi16) { + if (MCID.Opcode == ARM::MOVi16) { // Get immediate from MI. unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), ARM::reloc_arm_movw); @@ -1023,14 +1023,14 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= ((Lo16 >> 12) & 0xF) << 16; emitWordLE(Binary); return; - } else if(TID.Opcode == ARM::MOVTi16) { + } else if(MCID.Opcode == ARM::MOVTi16) { unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), ARM::reloc_arm_movt) >> 16); Binary |= Hi16 & 0xFFF; Binary |= ((Hi16 >> 12) & 0xF) << 16; emitWordLE(Binary); return; - } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) { + } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { uint32_t v = ~MI.getOperand(2).getImm(); int32_t lsb = CountTrailingZeros_32(v); int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; @@ -1039,7 +1039,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= (lsb & 0x1F) << 7; emitWordLE(Binary); return; - } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) { + } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) { // Encode Rn in Instr{0-3} Binary |= getMachineOpValue(MI, OpIdx++); @@ -1054,11 +1054,11 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, } // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; // Encode first non-shifter register operand if there is one. - bool isUnary = TID.TSFlags & ARMII::UnaryDP; + bool isUnary = MCID.TSFlags & ARMII::UnaryDP; if (!isUnary) { if (ImplicitRn) // Special handling for implicit use (e.g. PC). @@ -1071,9 +1071,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, // Encode shifter operand. const MachineOperand &MO = MI.getOperand(OpIdx); - if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { // Encode SoReg. - emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx)); + emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx)); return; } @@ -1092,9 +1092,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Form = TID.TSFlags & ARMII::FormMask; - bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1134,7 +1134,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; // If this is a two-address operand, skip it. e.g. LDR_PRE. - if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; const MachineOperand &MO2 = MI.getOperand(OpIdx); @@ -1170,9 +1170,9 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRn) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Form = TID.TSFlags & ARMII::FormMask; - bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1194,7 +1194,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; // Skip LDRD and STRD's second operand. - if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD) + if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD) ++OpIdx; // Set second operand @@ -1205,7 +1205,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; // If this is a two-address operand, skip it. e.g. LDRH_POST. - if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; const MachineOperand &MO2 = MI.getOperand(OpIdx); @@ -1255,8 +1255,8 @@ static unsigned getAddrModeUPBits(unsigned Mode) { } void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1295,7 +1295,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1304,12 +1304,12 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { Binary |= II->getPredicate(&MI) << ARMII::CondShift; // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, TID); + Binary |= getAddrModeSBit(MI, MCID); // 32x32->64bit operations have two destination registers. The number // of register definitions will tell us if that's what we're dealing with. unsigned OpIdx = 0; - if (TID.getNumDefs() == 2) + if (MCID.getNumDefs() == 2) Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; // Encode Rd @@ -1323,16 +1323,16 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { // Many multiple instructions (e.g. MLA) have three src operands. Encode // it as Rn (for multiply, that's in the same offset as RdLo. - if (TID.getNumOperands() > OpIdx && - !TID.OpInfo[OpIdx].isPredicate() && - !TID.OpInfo[OpIdx].isOptionalDef()) + if (MCID.getNumOperands() > OpIdx && + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; emitWordLE(Binary); } void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1361,15 +1361,15 @@ void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. if (MI.getOperand(OpIdx).isImm() && - !TID.OpInfo[OpIdx].isPredicate() && - !TID.OpInfo[OpIdx].isOptionalDef()) + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; emitWordLE(Binary); } void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1378,7 +1378,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { Binary |= II->getPredicate(&MI) << ARMII::CondShift; // PKH instructions are finished at this point - if (TID.Opcode == ARM::PKHBT || TID.Opcode == ARM::PKHTB) { + if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) { emitWordLE(Binary); return; } @@ -1389,9 +1389,9 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; const MachineOperand &MO = MI.getOperand(OpIdx++); - if (OpIdx == TID.getNumOperands() || - TID.OpInfo[OpIdx].isPredicate() || - TID.OpInfo[OpIdx].isOptionalDef()) { + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { // Encode Rm and it's done. Binary |= getMachineOpValue(MI, MO); emitWordLE(Binary); @@ -1406,7 +1406,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { // Encode shift_imm. unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); - if (TID.Opcode == ARM::PKHTB) { + if (MCID.Opcode == ARM::PKHTB) { assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); if (ShiftAmt == 32) ShiftAmt = 0; @@ -1418,7 +1418,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGen. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1431,11 +1431,11 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { // Encode saturate bit position. unsigned Pos = MI.getOperand(1).getImm(); - if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16) + if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16) Pos -= 1; assert((Pos < 16 || (Pos < 32 && - TID.Opcode != ARM::SSAT16 && - TID.Opcode != ARM::USAT16)) && + MCID.Opcode != ARM::SSAT16 && + MCID.Opcode != ARM::USAT16)) && "saturate bit position out of range"); Binary |= Pos << 16; @@ -1443,7 +1443,7 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 2); // Encode shift_imm. - if (TID.getNumOperands() == 4) { + if (MCID.getNumOperands() == 4) { unsigned ShiftOp = MI.getOperand(3).getImm(); ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); if (Opc == ARM_AM::asr) @@ -1459,9 +1459,9 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); - if (TID.Opcode == ARM::TPsoft) { + if (MCID.Opcode == ARM::TPsoft) { llvm_unreachable("ARM::TPsoft FIXME"); // FIXME } @@ -1498,20 +1498,20 @@ void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { } void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Handle jump tables. - if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) { + if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) { // First emit a ldr pc, [] instruction. emitDataProcessingInstruction(MI, ARM::PC); // Then emit the inline jump table. unsigned JTIndex = - (TID.Opcode == ARM::BR_JTr) + (MCID.Opcode == ARM::BR_JTr) ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); emitInlineJumpTable(JTIndex); return; - } else if (TID.Opcode == ARM::BR_JTm) { + } else if (MCID.Opcode == ARM::BR_JTm) { // First emit a ldr pc, [] instruction. emitLoadStoreInstruction(MI, ARM::PC); @@ -1526,7 +1526,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; - if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR) + if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) // The return register is LR. Binary |= getARMRegisterNumbering(ARM::LR); else @@ -1579,7 +1579,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { } void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1596,16 +1596,16 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { Binary |= encodeVFPRd(MI, OpIdx++); // If this is a two-address operand, skip it, e.g. FMACD. - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; // Encode Dn / Sn. - if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) Binary |= encodeVFPRn(MI, OpIdx++); - if (OpIdx == TID.getNumOperands() || - TID.OpInfo[OpIdx].isPredicate() || - TID.OpInfo[OpIdx].isOptionalDef()) { + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { // FCMPEZD etc. has only one operand. emitWordLE(Binary); return; @@ -1618,8 +1618,8 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - unsigned Form = TID.TSFlags & ARMII::FormMask; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1709,8 +1709,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); - bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1795,8 +1795,8 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { unsigned Binary = getBinaryCodeForInstr(MI); unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; - const TargetInstrDesc &TID = MI.getDesc(); - if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + const MCInstrDesc &MCID = MI.getDesc(); + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { RegTOpIdx = 0; RegNOpIdx = 1; LnOpIdx = 2; @@ -1863,12 +1863,12 @@ void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); unsigned Binary = getBinaryCodeForInstr(MI); // Destination register is encoded in Dd; source register in Dm. unsigned OpIdx = 0; Binary |= encodeNEONRd(MI, OpIdx++); - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; Binary |= encodeNEONRm(MI, OpIdx); if (IsThumb) @@ -1878,15 +1878,15 @@ void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { } void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { - const TargetInstrDesc &TID = MI.getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); unsigned Binary = getBinaryCodeForInstr(MI); // Destination register is encoded in Dd; source registers in Dn and Dm. unsigned OpIdx = 0; Binary |= encodeNEONRd(MI, OpIdx++); - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; Binary |= encodeNEONRn(MI, OpIdx++); - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) ++OpIdx; Binary |= encodeNEONRm(MI, OpIdx); if (IsThumb) diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index baf95a33dd4b..f45ebdc53500 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1538,7 +1538,10 @@ bool ARMConstantIslands::UndoLRSpillRestore() { if (MI->getOpcode() == ARM::tPOP_RET && MI->getOperand(2).getReg() == ARM::PC && MI->getNumExplicitOperands() == 3) { - BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)); + // Create the new insn and copy the predicate from the old. + BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); MI->eraseFromParent(); MadeChange = true; } @@ -1692,9 +1695,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MCID.getNumOperands(); + unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1815,9 +1818,9 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumOps = MCID.getNumOperands(); + unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index b6b3c75943b5..94b72fdb9a7e 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -68,7 +68,7 @@ namespace { void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI) { - const TargetInstrDesc &Desc = OldMI.getDesc(); + const MCInstrDesc &Desc = OldMI.getDesc(); for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = OldMI.getOperand(i); @@ -727,8 +727,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::t2MOVCCr: case ARM::MOVCCr: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr), + unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addReg(MI.getOperand(2).getReg(), getKillRegState(MI.getOperand(2).isKill())) @@ -764,8 +766,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::t2MOVCCi: case ARM::MOVCCi: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), + unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' @@ -837,8 +841,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) .addReg(0) - .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr - : ARM_AM::asr), 1))) + .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? + ARM_AM::lsr : ARM_AM::asr), + 1))) .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); return true; @@ -856,10 +861,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::tTPsoft: case ARM::TPsoft: { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::BL)) + TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL)) .addExternalSymbol("__aeabi_read_tp", 0); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -900,10 +906,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn); + bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn); bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; - unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel; + unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; unsigned LO16TF = isPIC ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY; unsigned HI16TF = isPIC @@ -958,15 +964,17 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::VMOVQ)) + TII->get(ARM::VORRq)) .addReg(EvenDst, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill)) .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::VMOVQ)) + TII->get(ARM::VORRq)) .addReg(OddDst, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill)) .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5cf73c479a33..f469d7efe11a 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -219,8 +219,8 @@ class ARMFastISel : public FastISel { // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.hasOptionalDef()) + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.hasOptionalDef()) return false; // Look to see if our OptionalDef is defining CPSR or CCR. @@ -234,15 +234,15 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { } bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); // If we're a thumb2 or not NEON function we were handled via isPredicable. - if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || + if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) return false; - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) - if (TID.OpInfo[i].isPredicate()) + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) + if (MCID.OpInfo[i].isPredicate()) return true; return false; @@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); return ResultReg; @@ -288,7 +288,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -308,7 +308,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -331,7 +331,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -355,7 +355,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -377,7 +377,7 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -400,7 +400,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op1, bool Op1IsKill, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -423,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -442,7 +442,7 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm1, uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); - const TargetInstrDesc &II = TII.get(MachineInstOpcode); + const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) @@ -1549,7 +1549,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, NumBytes = CCInfo.getNextStackOffset(); // Issue CALLSEQ_START - unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) .addImm(NumBytes)); @@ -1647,7 +1647,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes) { // Issue CALLSEQ_END - unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(0)); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index e2e95d47b37b..381b404519e2 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -268,14 +268,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // bic r4, r4, MaxAlign // mov sp, r4 // FIXME: It will be better just to find spare register here. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4) - .addReg(ARM::SP, RegState::Kill); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill)); AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2BICri), ARM::R4) .addReg(ARM::R4, RegState::Kill) .addImm(MaxAlign-1))); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(ARM::R4, RegState::Kill); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); @@ -293,9 +293,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(ARM::SP) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else - BuildMI(MBB, MBBI, dl, - TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister()) - .addReg(ARM::SP); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + RegInfo->getBaseRegister()) + .addReg(ARM::SP)); } // If the frame has variable sized objects then the epilogue must restore @@ -364,8 +364,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(ARM::R4); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(ARM::R4)); } } else { // Thumb2 or ARM. @@ -373,8 +374,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(FramePtr); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(FramePtr)); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); @@ -427,6 +429,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); + MBBI = NewMI; } if (VARegSaveSize) @@ -736,20 +739,52 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, /// estimateStackSize - Estimate and return the size of the frame. /// FIXME: Make generic? static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -MFI->getObjectOffset(i); if (FixedOff > Offset) Offset = FixedOff; } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); + Offset += MFI->getObjectSize(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); } + + if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + return (unsigned)Offset; } @@ -841,9 +876,14 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (AFI->getVarArgsRegSaveSize() > 0) MF.getRegInfo().setPhysRegUsed(ARM::LR); - // Spill R4 if Thumb1 epilogue has to restore SP from FP since + // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know + // for sure what the stack size will be, but for this, an estimate is good + // enough. If there anything changes it, it'll be a spill, which implies + // we've used all the registers and so R4 is already used, so not marking + // it here will be OK. // FIXME: It will be better just to find spare register here. - if (MFI->hasVarSizedObjects()) + unsigned StackSize = estimateStackSize(MF); + if (MFI->hasVarSizedObjects() || StackSize > 508) MF.getRegInfo().setPhysRegUsed(ARM::R4); } diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp index 3f0238387a2b..8d77b2d8383e 100644 --- a/lib/Target/ARM/ARMGlobalMerge.cpp +++ b/lib/Target/ARM/ARMGlobalMerge.cpp @@ -128,10 +128,10 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; uint64_t MergedSize = 0; - std::vector<const Type*> Tys; + std::vector<Type*> Tys; std::vector<Constant*> Inits; for (j = i; j != e; ++j) { - const Type *Ty = Globals[j]->getType()->getElementType(); + Type *Ty = Globals[j]->getType()->getElementType(); MergedSize += TD->getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { break; @@ -175,7 +175,9 @@ bool ARMGlobalMerge::doInitialization(Module &M) { continue; // Ignore fancy-aligned globals for now. - if (I->getAlignment() != 0) + unsigned Alignment = I->getAlignment(); + const Type *Ty = I->getType()->getElementType(); + if (Alignment > TD->getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. @@ -183,7 +185,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; - if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) { + if (TD->getTypeAllocSize(Ty) < MaxOffset) { const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) BSSGlobals.push_back(I); diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 517bba8cee8e..787f6a279187 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -19,11 +19,11 @@ using namespace llvm; static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI) { // FIXME: Detect integer instructions properly. - const TargetInstrDesc &TID = MI->getDesc(); - unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (TID.mayStore()) + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Domain = MCID.TSFlags & ARMII::DomainMask; + if (MCID.mayStore()) return false; - unsigned Opcode = TID.getOpcode(); + unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) return false; if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) @@ -43,15 +43,15 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following // a VMLA / VMLS will cause 4 cycle stall. - const TargetInstrDesc &TID = MI->getDesc(); - if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { + const MCInstrDesc &MCID = MI->getDesc(); + if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { MachineInstr *DefMI = LastMI; - const TargetInstrDesc &LastTID = LastMI->getDesc(); + const MCInstrDesc &LastMCID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. - if (!LastTID.isBarrier() && + if (!LastMCID.isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) && - (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { + !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) && + (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { I = llvm::prior(I); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9ad516dafb91..2c9481b86c55 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -329,10 +329,10 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { - const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode()); - if (TID.mayStore()) + const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); + if (MCID.mayStore()) return true; - unsigned Opcode = TID.getOpcode(); + unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) return true; // vmlx feeding into another vmlx. We actually want to unfold @@ -1354,30 +1354,34 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { /// SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = + CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } /// PairDRegs - Form a quad register from a pair of D registers. /// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. /// SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } /// QuadSRegs - Form 4 consecutive S registers. @@ -1385,12 +1389,15 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = + CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32); SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, + V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } /// QuadDRegs - Form 4 consecutive D registers. @@ -1398,12 +1405,14 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, + V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } /// QuadQRegs - Form 4 consecutive Q registers. @@ -1411,12 +1420,14 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32); SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, + V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 339c85886aea..cf8c5baa8e7d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -506,6 +506,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FDIV); } computeRegisterProperties(); @@ -538,7 +541,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); } - if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()) + if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() + || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); @@ -704,6 +708,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Various VFP goodness if (!UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. @@ -974,12 +981,12 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - if (TID.getNumDefs() == 0) + if (MCID.getNumDefs() == 0) return Sched::RegPressure; if (!Itins->isEmpty() && - Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) + Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) return Sched::Latency; return Sched::RegPressure; @@ -1633,7 +1640,11 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: - // emitEpilogue is not ready for them. + // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as + // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation + // support in the assembler and linker to be used. This would need to be + // fixed to fully support tail calls in Thumb1. + // // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take // LR. This means if we need to reload LR, it takes an extra instructions, // which outweighs the value of the tail call; but here we don't know yet @@ -2281,12 +2292,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, // ARMv7 with MP extension has PLDW. return Op.getOperand(0); - if (Subtarget->isThumb()) + unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; - unsigned isData = Subtarget->isThumb() ? 0 : 1; + isData = ~isData & 1; + } - // Currently there is no intrinsic that matches pli. return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), DAG.getConstant(isData, MVT::i32)); @@ -2742,7 +2754,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); } ARMCC::CondCodes CondCode, CondCode2; @@ -5522,12 +5534,108 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, return SDValue(); } +// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction +// (only after legalization). +static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + // Only perform optimization if after legalize, and if NEON is available. We + // also expected both operands to be BUILD_VECTORs. + if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() + || N0.getOpcode() != ISD::BUILD_VECTOR + || N1.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // Check output type since VPADDL operand elements can only be 8, 16, or 32. + EVT VT = N->getValueType(0); + if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) + return SDValue(); + + // Check that the vector operands are of the right form. + // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR + // operands, where N is the size of the formed vector. + // Each EXTRACT_VECTOR should have the same input vector and odd or even + // index such that we have a pair wise add pattern. + + // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. + if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + SDValue Vec = N0->getOperand(0)->getOperand(0); + SDNode *V = Vec.getNode(); + unsigned nextIndex = 0; + + // For each operands to the ADD which are BUILD_VECTORs, + // check to see if each of their operands are an EXTRACT_VECTOR with + // the same vector and appropriate index. + for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { + if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT + && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + + SDValue ExtVec0 = N0->getOperand(i); + SDValue ExtVec1 = N1->getOperand(i); + + // First operand is the vector, verify its the same. + if (V != ExtVec0->getOperand(0).getNode() || + V != ExtVec1->getOperand(0).getNode()) + return SDValue(); + + // Second is the constant, verify its correct. + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); + ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); + + // For the constant, we want to see all the even or all the odd. + if (!C0 || !C1 || C0->getZExtValue() != nextIndex + || C1->getZExtValue() != nextIndex+1) + return SDValue(); + + // Increment index. + nextIndex+=2; + } else + return SDValue(); + } + + // Create VPADDL node. + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Build operand list. + SmallVector<SDValue, 8> Ops; + Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, + TLI.getPointerTy())); + + // Input is the vector. + Ops.push_back(Vec); + + // Get widened type and narrowed type. + MVT widenType; + unsigned numElem = VT.getVectorNumElements(); + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; + case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; + case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; + default: + assert(0 && "Invalid vector element type for padd optimization."); + } + + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + widenType, &Ops[0], Ops.size()); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); +} + /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted /// operands. static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget){ + + // Attempt to create vpaddl for this add. + SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); + if (Result.getNode()) + return Result; + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); @@ -5539,17 +5647,18 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // First try with the default operand order. - SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); + SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); if (Result.getNode()) return Result; // If that didn't work, try again with the operands commuted. - return PerformADDCombineWithOperands(N, N1, N0, DCI); + return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); } /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. @@ -5588,7 +5697,7 @@ static SDValue PerformVMULCombine(SDNode *N, unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) { - Opcode = N0.getOpcode(); + Opcode = N1.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) return SDValue(); @@ -5874,8 +5983,8 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } -/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff -/// C1 & C2 == C1. +/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff +/// the bits being cleared by the AND are not demanded by the BFI. static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); @@ -5883,9 +5992,12 @@ static SDValue PerformBFICombine(SDNode *N, ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); if (!N11C) return SDValue(); - unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned LSB = CountTrailingZeros_32(~InvMask); + unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; + unsigned Mask = (1 << Width)-1; unsigned Mask2 = N11C->getZExtValue(); - if ((Mask & Mask2) == Mask2) + if ((Mask & (~Mask2)) == 0) return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); @@ -6378,7 +6490,105 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } -/// getVShiftImm - Check if this is a valid build_vector for the immediate +// isConstVecPow2 - Return true if each vector element is a power of 2, all +// elements are the same constant, C, and Log2(C) ranges from 1 to 32. +static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) +{ + integerPart cN; + integerPart c0 = 0; + for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); + I != E; I++) { + ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); + if (!C) + return false; + + bool isExact; + APFloat APF = C->getValueAPF(); + if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) + != APFloat::opOK || !isExact) + return false; + + c0 = (I == 0) ? cN : c0; + if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) + return false; + } + C = c0; + return true; +} + +/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) +/// can replace combinations of VMUL and VCVT (floating-point to integer) +/// when the VMUL has a constant operand that is a power of 2. +/// +/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): +/// vmul.f32 d16, d17, d16 +/// vcvt.s32.f32 d16, d16 +/// becomes: +/// vcvt.s32.f32 d16, d16, #3 +static SDValue PerformVCVTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + + if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || + Op.getOpcode() != ISD::FMUL) + return SDValue(); + + uint64_t C; + SDValue N0 = Op->getOperand(0); + SDValue ConstVec = Op->getOperand(1); + bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; + + if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || + !isConstVecPow2(ConstVec, isSigned, C)) + return SDValue(); + + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : + Intrinsic::arm_neon_vcvtfp2fxu; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + N->getValueType(0), + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); +} + +/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) +/// can replace combinations of VCVT (integer to floating-point) and VDIV +/// when the VDIV has a constant operand that is a power of 2. +/// +/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): +/// vcvt.f32.s32 d16, d16 +/// vdiv.f32 d16, d17, d16 +/// becomes: +/// vcvt.f32.s32 d16, d16, #3 +static SDValue PerformVDIVCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + unsigned OpOpcode = Op.getNode()->getOpcode(); + + if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || + (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) + return SDValue(); + + uint64_t C; + SDValue ConstVec = N->getOperand(1); + bool isSigned = OpOpcode == ISD::SINT_TO_FP; + + if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || + !isConstVecPow2(ConstVec, isSigned, C)) + return SDValue(); + + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : + Intrinsic::arm_neon_vcvtfxu2fp; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + Op.getValueType(), + DAG.getConstant(IntrinsicOpcode, MVT::i32), + Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); +} + +/// Getvshiftimm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { @@ -6750,11 +6960,75 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); } +/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. +SDValue +ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { + SDValue Cmp = N->getOperand(4); + if (Cmp.getOpcode() != ARMISD::CMPZ) + // Only looking at EQ and NE cases. + return SDValue(); + + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = Cmp.getOperand(0); + SDValue RHS = Cmp.getOperand(1); + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue ARMcc = N->getOperand(2); + ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + + // Simplify + // mov r1, r0 + // cmp r1, x + // mov r0, y + // moveq r0, x + // to + // cmp r0, x + // movne r0, y + // + // mov r1, r0 + // cmp r1, x + // mov r0, x + // movne r0, y + // to + // cmp r0, x + // movne r0, y + /// FIXME: Turn this into a target neutral optimization? + SDValue Res; + if (CC == ARMCC::NE && FalseVal == RHS) { + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, + N->getOperand(3), Cmp); + } else if (CC == ARMCC::EQ && TrueVal == RHS) { + SDValue ARMcc; + SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, + N->getOperand(3), NewCmp); + } + + if (Res.getNode()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); + DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); + // Capture demanded bits information that would be otherwise lost. + if (KnownZero == 0xfffffffe) + Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, + DAG.getValueType(MVT::i1)); + else if (KnownZero == 0xffffff00) + Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, + DAG.getValueType(MVT::i8)); + else if (KnownZero == 0xffff0000) + Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, + DAG.getValueType(MVT::i16)); + } + + return Res; +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); @@ -6767,6 +7041,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); + case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: @@ -6775,6 +7052,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); + case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: @@ -7277,10 +7555,17 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { default: break; case 'l': return C_RegisterClass; case 'w': return C_RegisterClass; + case 'h': return C_RegisterClass; + case 'x': return C_RegisterClass; + case 't': return C_RegisterClass; + case 'j': return C_Other; // Constant for movw. + } + } else if (Constraint.size() == 2) { + switch (Constraint[0]) { + default: break; + // All 'U+' constraints are addresses. + case 'U': return C_Memory; } - } else { - if (Constraint == "Uv") - return C_Memory; } return TargetLowering::getConstraintType(Constraint); } @@ -7319,26 +7604,43 @@ ARMTargetLowering::getSingleConstraintMatchWeight( return weight; } -std::pair<unsigned, const TargetRegisterClass*> +typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; +RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { - case 'l': + case 'l': // Low regs or general regs. if (Subtarget->isThumb()) - return std::make_pair(0U, ARM::tGPRRegisterClass); + return RCPair(0U, ARM::tGPRRegisterClass); else - return std::make_pair(0U, ARM::GPRRegisterClass); + return RCPair(0U, ARM::GPRRegisterClass); + case 'h': // High regs or no regs. + if (Subtarget->isThumb()) + return RCPair(0U, ARM::hGPRRegisterClass); + break; case 'r': - return std::make_pair(0U, ARM::GPRRegisterClass); + return RCPair(0U, ARM::GPRRegisterClass); case 'w': if (VT == MVT::f32) - return std::make_pair(0U, ARM::SPRRegisterClass); + return RCPair(0U, ARM::SPRRegisterClass); if (VT.getSizeInBits() == 64) - return std::make_pair(0U, ARM::DPRRegisterClass); + return RCPair(0U, ARM::DPRRegisterClass); if (VT.getSizeInBits() == 128) - return std::make_pair(0U, ARM::QPRRegisterClass); + return RCPair(0U, ARM::QPRRegisterClass); + break; + case 'x': + if (VT == MVT::f32) + return RCPair(0U, ARM::SPR_8RegisterClass); + if (VT.getSizeInBits() == 64) + return RCPair(0U, ARM::DPR_8RegisterClass); + if (VT.getSizeInBits() == 128) + return RCPair(0U, ARM::QPR_8RegisterClass); + break; + case 't': + if (VT == MVT::f32) + return RCPair(0U, ARM::SPRRegisterClass); break; } } @@ -7348,47 +7650,6 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> ARMTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { // GCC ARM Constraint Letters - default: break; - case 'l': - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - 0); - case 'r': - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, ARM::R11, - ARM::R12, ARM::LR, 0); - case 'w': - if (VT == MVT::f32) - return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12,ARM::S13,ARM::S14,ARM::S15, - ARM::S16,ARM::S17,ARM::S18,ARM::S19, - ARM::S20,ARM::S21,ARM::S22,ARM::S23, - ARM::S24,ARM::S25,ARM::S26,ARM::S27, - ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); - if (VT.getSizeInBits() == 64) - return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10,ARM::D11, - ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); - if (VT.getSizeInBits() == 128) - return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, - ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); - break; - } - - return std::vector<unsigned>(); -} - /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, @@ -7403,6 +7664,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; + case 'j': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); @@ -7417,6 +7679,13 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; switch (ConstraintLetter) { + case 'j': + // Constant suitable for movw, must be between 0 and + // 65535. + if (Subtarget->hasV6T2Ops()) + if (CVal >= 0 && CVal <= 65535) + break; + return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD @@ -7685,7 +7954,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; - Info.vol = false; + Info.vol = true; Info.readMem = false; Info.writeMem = true; return true; @@ -7696,7 +7965,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; - Info.vol = false; + Info.vol = true; Info.readMem = true; Info.writeMem = false; return true; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 21a9a3aa746d..980fb404887e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -244,6 +244,7 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; @@ -306,9 +307,6 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 897d8a5d79e3..3ccf22f80b7d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -107,16 +107,6 @@ def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; def AddrMode_i12 : AddrMode<16>; -// Instruction size. -class SizeFlagVal<bits<3> val> { - bits<3> Value = val; -} -def SizeInvalid : SizeFlagVal<0>; // Unset. -def SizeSpecial : SizeFlagVal<1>; // Pseudo or special. -def Size8Bytes : SizeFlagVal<2>; -def Size4Bytes : SizeFlagVal<3>; -def Size2Bytes : SizeFlagVal<4>; - // Load / store index mode. class IndexMode<bits<2> val> { bits<2> Value = val; @@ -236,13 +226,13 @@ def shr_imm64 : Operand<i32> { // ARM Instruction templates. // -class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, +class InstTemplate<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : Instruction { let Namespace = "ARM"; AddrMode AM = am; - SizeFlagVal SZ = sz; + int Size = sz; IndexMode IM = im; bits<2> IndexModeBits = IM.Value; Format F = f; @@ -256,12 +246,11 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h. let TSFlags{4-0} = AM.Value; - let TSFlags{7-5} = SZ.Value; - let TSFlags{9-8} = IndexModeBits; - let TSFlags{15-10} = Form; - let TSFlags{16} = isUnaryDataProc; - let TSFlags{17} = canXformTo16Bit; - let TSFlags{20-18} = D.Value; + let TSFlags{6-5} = IndexModeBits; + let TSFlags{12-7} = Form; + let TSFlags{13} = isUnaryDataProc; + let TSFlags{14} = canXformTo16Bit; + let TSFlags{17-15} = D.Value; let Constraints = cstr; let Itinerary = itin; @@ -271,53 +260,70 @@ class Encoding { field bits<32> Inst; } -class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im, +class InstARM<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding; // This Encoding-less class is used by Thumb1 to specify the encoding bits later // on by adding flavors to specific instructions. -class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im, +class InstThumb<AddrMode am, int sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : InstTemplate<am, sz, im, f, d, cstr, itin>; class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> - // FIXME: This really should derive from InstTemplate instead, as pseudos - // don't need encoding information. TableGen doesn't like that - // currently. Need to figure out why and fix it. - : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain, - "", itin> { + : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, + GenericDomain, "", itin> { let OutOperandList = oops; let InOperandList = iops; let Pattern = pattern; let isCodeGenOnly = 1; + let isPseudo = 1; } // PseudoInst that's ARM-mode only. -class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin, +class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin, list<dag> pattern> : PseudoInst<oops, iops, itin, pattern> { - let SZ = sz; + let Size = sz; list<Predicate> Predicates = [IsARM]; } // PseudoInst that's Thumb-mode only. -class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin, +class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin, list<dag> pattern> : PseudoInst<oops, iops, itin, pattern> { - let SZ = sz; + let Size = sz; list<Predicate> Predicates = [IsThumb]; } // PseudoInst that's Thumb2-mode only. -class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin, +class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin, list<dag> pattern> : PseudoInst<oops, iops, itin, pattern> { - let SZ = sz; + let Size = sz; list<Predicate> Predicates = [IsThumb2]; } + +class ARMPseudoExpand<dag oops, dag iops, int sz, + InstrItinClass itin, list<dag> pattern, + dag Result> + : ARMPseudoInst<oops, iops, sz, itin, pattern>, + PseudoInstExpansion<Result>; + +class tPseudoExpand<dag oops, dag iops, int sz, + InstrItinClass itin, list<dag> pattern, + dag Result> + : tPseudoInst<oops, iops, sz, itin, pattern>, + PseudoInstExpansion<Result>; + +class t2PseudoExpand<dag oops, dag iops, int sz, + InstrItinClass itin, list<dag> pattern, + dag Result> + : t2PseudoInst<oops, iops, sz, itin, pattern>, + PseudoInstExpansion<Result>; + // Almost all ARM instructions are predicable. -class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class I<dag oops, dag iops, AddrMode am, int sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -332,7 +338,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, } // A few are not predicable -class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class InoP<dag oops, dag iops, AddrMode am, int sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -348,7 +354,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, // Same as I except it can optionally modify CPSR. Note it's modeled as an input // operand since by default it's a zero register. It will become an implicit def // once it's "flipped". -class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class sI<dag oops, dag iops, AddrMode am, int sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -366,7 +372,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, } // Special cases -class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class XI<dag oops, dag iops, AddrMode am, int sz, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { @@ -379,31 +385,31 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class AI<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, opc, asm, "", pattern>; class AsI<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, opc, asm, "", pattern>; class AXI<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, asm, "", pattern>; class AInoP<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, opc, asm, "", pattern>; // Ctrl flow instructions class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin, opc, asm, "", pattern> { let Inst{27-24} = opcod; } class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin, + : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin, asm, "", pattern> { let Inst{27-24} = opcod; } @@ -411,13 +417,13 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, // BR_JT instructions class JTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin, + : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; // Atomic load/store instructions class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { bits<4> Rt; bits<4> Rn; @@ -430,7 +436,7 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, } class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rt; @@ -460,21 +466,21 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> // addrmode1 instructions class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, + : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; let Inst{27-26} = 0b00; } class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, + : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; let Inst{27-26} = 0b00; } class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, + : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin, asm, "", pattern> { let Inst{24-21} = opcod; let Inst{27-26} = 0b00; @@ -486,7 +492,7 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm, + : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{27-25} = op; let Inst{24} = 1; // 24 == P @@ -499,7 +505,7 @@ class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am, class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin, + : I<oops, iops, AddrMode2, 4, im, f, itin, opc, asm, cstr, pattern> { bits<4> Rt; let Inst{27-26} = 0b01; @@ -547,7 +553,7 @@ class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops, // addrmode3 instructions class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, + : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin, opc, asm, "", pattern> { bits<14> addr; bits<4> Rt; @@ -567,7 +573,7 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin, + : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> { bits<4> Rt; let Inst{27-25} = 0b000; @@ -583,7 +589,7 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin, + : I<oops, iops, AddrMode3, 4, im, f, itin, opc, asm, cstr, pattern> { // {13} 1 == imm8, 0 == Rm // {12-9} Rn @@ -627,7 +633,7 @@ class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops, // stores class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, + : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin, opc, asm, "", pattern> { bits<14> addr; bits<4> Rt; @@ -647,7 +653,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin, // Pre-indexed stores class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, + : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin, opc, asm, cstr, pattern> { let Inst{4} = 1; let Inst{5} = 1; // H bit @@ -660,7 +666,7 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, } class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, + : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin, opc, asm, cstr, pattern> { let Inst{4} = 1; let Inst{5} = 1; // H bit @@ -675,7 +681,7 @@ class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, // Post-indexed stores class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, + : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin, opc, asm, cstr,pattern> { // {13} 1 == imm8, 0 == Rm // {12-9} Rn @@ -701,7 +707,7 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, } class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, + : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin, opc, asm, cstr, pattern> { let Inst{4} = 1; let Inst{5} = 1; // H bit @@ -716,7 +722,7 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, // addrmode4 instructions class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> { + : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> { bits<4> p; bits<16> regs; bits<4> Rn; @@ -730,7 +736,7 @@ class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, // Unsigned multiply, multiply-accumulate instructions. class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin, opc, asm, "", pattern> { let Inst{7-4} = 0b1001; let Inst{20} = 0; // S bit @@ -738,7 +744,7 @@ class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, } class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin, + : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin, opc, asm, "", pattern> { let Inst{7-4} = 0b1001; let Inst{27-21} = opcod; @@ -747,7 +753,7 @@ class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, // Most significant word multiply class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rn; @@ -770,7 +776,7 @@ class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops, // SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y> class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin, opc, asm, "", pattern> { bits<4> Rn; bits<4> Rm; @@ -809,7 +815,7 @@ class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops, // Extend instructions. class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin, opc, asm, "", pattern> { // All AExtI instructions have Rd and Rm register operands. bits<4> Rd; @@ -824,7 +830,7 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, // Misc Arithmetic instructions. class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rm; @@ -839,7 +845,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, // PKH instructions class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin, + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rn; @@ -874,7 +880,7 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { // Thumb Instruction Format Definitions. // -class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class ThumbI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; @@ -886,39 +892,32 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, // TI - Thumb instruction. class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>; + : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>; // Two-address instructions class TIt<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst", + : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst", pattern>; // tBL, tBX 32-bit instructions class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>, + : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>, Encoding { let Inst{31-27} = opcod1; let Inst{15-14} = opcod2; let Inst{12} = opcod3; } -// Move to/from coprocessor instructions -class T1Cop<dag oops, dag iops, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>, - Encoding, Requires<[IsThumb, HasV6]> { - let Inst{31-28} = 0b1110; -} - // BR_JT instructions class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; + : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>; // Thumb1 only -class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class Thumb1I<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; @@ -930,19 +929,19 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class T1I<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>; + : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>; class T1Ix2<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>; + : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>; // Two-address instructions class T1It<dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, + : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, cstr, pattern>; // Thumb1 instruction that can either be predicated or set CPSR. -class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class Thumb1sI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -955,16 +954,16 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class T1sI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>; + : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>; // Two-address instructions class T1sIt<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, + : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "$Rn = $Rdn", pattern>; // Thumb1 instruction that can be predicated. -class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class Thumb1pI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -977,17 +976,17 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class T1pI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>; + : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>; // Two-address instructions class T1pIt<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, + : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "$Rn = $Rdn", pattern>; class T1pIs<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>; + : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>; class Encoding16 : Encoding { let Inst{31-16} = 0x0000; @@ -1036,7 +1035,7 @@ class T1BranchCond<bits<4> opcode> : Encoding16 { class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>, + : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>, T1LoadStore<0b0101, opcode> { bits<3> Rt; bits<8> addr; @@ -1047,7 +1046,7 @@ class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am, class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>, + : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>, T1LoadStore<opA, {opB,?,?}> { bits<3> Rt; bits<8> addr; @@ -1063,7 +1062,7 @@ class T1Misc<bits<7> opcode> : Encoding16 { } // Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable. -class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class Thumb2I<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -1080,7 +1079,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, // // FIXME: This uses unified syntax so {s} comes before {p}. We should make it // more consistent. -class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class Thumb2sI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -1095,7 +1094,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, } // Special cases -class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class Thumb2XI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -1106,7 +1105,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb2]; } -class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class ThumbXI<dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -1119,22 +1118,22 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class T2I<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>; + : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>; class T2Ii12<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>; + : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>; class T2Ii8<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>; + : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>; class T2Iso<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, itin, opc, asm, "", pattern>; + : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>; class T2Ipc<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>; + : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>; class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "", + : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "", pattern> { bits<4> Rt; bits<4> Rt2; @@ -1153,32 +1152,32 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, class T2sI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>; + : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>; class T2XI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>; + : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>; class T2JTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; + : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>; // Move to/from coprocessor instructions -class T2Cop<dag oops, dag iops, string asm, list<dag> pattern> - : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> { - let Inst{31-28} = 0b1111; +class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern> + : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> { + let Inst{31-28} = opc; } // Two-address instructions class T2XIt<dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, cstr, pattern>; + : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>; // T2Iidxldst - Thumb2 indexed load / store instructions. class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> { + : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", asm); @@ -1232,7 +1231,7 @@ class T2Pat<dag pattern, dag result> : Pat<pattern, result> { // // Almost all VFP instructions are predicable. -class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class VFPI<dag oops, dag iops, AddrMode am, int sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { @@ -1247,7 +1246,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, } // Special cases -class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class VFPXI<dag oops, dag iops, AddrMode am, int sz, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { @@ -1263,7 +1262,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, opc, asm, "", pattern> { let PostEncoderMethod = "VFPThumb2PostEncoder"; } @@ -1272,7 +1271,7 @@ class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin, class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, + : VFPI<oops, iops, AddrMode5, 4, IndexModeNone, VFPLdStFrm, itin, opc, asm, "", pattern> { // Instruction operands. bits<5> Dd; @@ -1298,7 +1297,7 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, + : VFPI<oops, iops, AddrMode5, 4, IndexModeNone, VFPLdStFrm, itin, opc, asm, "", pattern> { // Instruction operands. bits<5> Sd; @@ -1324,7 +1323,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, // VFP Load / store multiple pseudo instructions. class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, list<dag> pattern> - : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain, + : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); @@ -1335,7 +1334,7 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, // Load / store multiple class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, 4, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // Instruction operands. bits<4> Rn; @@ -1355,7 +1354,7 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, 4, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // Instruction operands. bits<4> Rn; @@ -1569,7 +1568,7 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { + : InstARM<am, 4, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); @@ -1581,7 +1580,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { + : InstARM<am, 4, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, "${p}", "\t", asm); @@ -1621,7 +1620,7 @@ class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, } class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> - : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr, + : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); @@ -1630,7 +1629,7 @@ class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr, list<dag> pattern> - : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr, + : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); @@ -1859,7 +1858,7 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain, + : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain, "", itin> { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 6f48d967f919..adcbf1806fe3 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -14,7 +14,6 @@ #include "ARMInstrInfo.h" #include "ARM.h" #include "ARMAddressingModes.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 9af76df7c37d..a42dd1a54ec7 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,6 +62,9 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, + SDTCisInt<1>]>; + def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, @@ -130,7 +133,7 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain]>; -def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch, +def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; @@ -144,33 +147,48 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV4T : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate; +def HasV4T : Predicate<"Subtarget->hasV4TOps()">, + AssemblerPredicate<"HasV4TOps">; def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, + AssemblerPredicate<"HasV5TEOps">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">, + AssemblerPredicate<"HasV6Ops">; def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; -def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, + AssemblerPredicate<"HasV6T2Ops">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; -def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">, + AssemblerPredicate<"HasV7Ops">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate; -def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate; -def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate; -def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, + AssemblerPredicate<"FeatureVFP2">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, + AssemblerPredicate<"FeatureVFP3">; +def HasNEON : Predicate<"Subtarget->hasNEON()">, + AssemblerPredicate<"FeatureNEON">; +def HasFP16 : Predicate<"Subtarget->hasFP16()">, + AssemblerPredicate<"FeatureFP16">; +def HasDivide : Predicate<"Subtarget->hasDivide()">, + AssemblerPredicate<"FeatureHWDiv">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, - AssemblerPredicate; + AssemblerPredicate<"FeatureT2XtPk">; +def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">, + AssemblerPredicate<"FeatureDSPThumb2">; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, - AssemblerPredicate; + AssemblerPredicate<"FeatureDB">; def HasMP : Predicate<"Subtarget->hasMPExtension()">, - AssemblerPredicate; + AssemblerPredicate<"FeatureMP">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; -def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate; +def IsThumb : Predicate<"Subtarget->isThumb()">, + AssemblerPredicate<"ModeThumb">; def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate; -def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">, + AssemblerPredicate<"ModeThumb,FeatureThumb2">; +def IsARM : Predicate<"!Subtarget->isThumb()">, + AssemblerPredicate<"!ModeThumb">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; @@ -237,11 +255,13 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -/// imm0_65535 predicate - True if the 32-bit immediate is in the range -/// [0.65535]. -def imm0_65535 : ImmLeaf<i32, [{ +/// imm0_65535 - An immediate is in the range [0.65535]. +def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; } +def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 65536; -}]>; +}]> { + let ParserMatchClass = Imm0_65535AsmOperand; +} class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; @@ -294,16 +314,19 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // FIXME: rename brtarget to t2_brtarget def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } // FIXME: get rid of this one? def uncondbrtarget : Operand<OtherVT> { let EncoderMethod = "getUnconditionalBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } // Branch target for ARM. Handles conditional/unconditional def br_target : Operand<OtherVT> { let EncoderMethod = "getARMBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } // Call target. @@ -311,6 +334,7 @@ def br_target : Operand<OtherVT> { def bltarget : Operand<i32> { // Encoded the same as branch targets. let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } // Call target for ARM. Handles conditional/unconditional @@ -318,6 +342,7 @@ def bltarget : Operand<i32> { def bl_target : Operand<i32> { // Encoded the same as branch targets. let EncoderMethod = "getARMBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; } @@ -394,14 +419,20 @@ def shift_imm : Operand<i32> { let ParserMatchClass = ShifterAsmOperand; } +def ShiftedRegAsmOperand : AsmOperandClass { + let Name = "ShiftedReg"; +} + // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShifterOperandReg", [shl,srl,sra,rotr]> { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; let MIOperandInfo = (ops GPR, GPR, shift_imm); } +// FIXME: Does this need to be distinct from so_reg? def shift_so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShiftShifterOperandReg", [shl,srl,sra,rotr]> { @@ -416,7 +447,6 @@ def so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { let EncoderMethod = "getSOImmOpValue"; - let PrintMethod = "printSOImmOperand"; } // Break so_imm's up into two pieces. This handles immediates with up to 16 @@ -434,6 +464,22 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; +/// imm0_7 predicate - Immediate in the range [0,31]. +def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; } +def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 8; +}]> { + let ParserMatchClass = Imm0_7AsmOperand; +} + +/// imm0_15 predicate - Immediate in the range [0,31]. +def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; } +def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 16; +}]> { + let ParserMatchClass = Imm0_15AsmOperand; +} + /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; @@ -673,7 +719,7 @@ include "ARMInstrFormats.td" /// binop that produces a value. multiclass AsI1_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + PatFrag opnode, string baseOpc, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -713,6 +759,24 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let Inst{15-12} = Rd; let Inst{11-0} = shift; } + + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, + so_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; } /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the @@ -909,9 +973,9 @@ multiclass AI_exta_rrot_np<bits<8> opcod, string opc> { } /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. -let Uses = [CPSR] in { multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { + string baseOpc, bit Commutable = 0> { + let Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, @@ -950,7 +1014,24 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15-12} = Rd; let Inst{19-16} = Rn; } -} + } + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, + so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, + GPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn, + so_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsARM]>; } // Carry setting variants @@ -958,15 +1039,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let usesCustomInserter = 1 in { multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - Size4Bytes, IIC_iALUi, + 4, IIC_iALUi, [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - Size4Bytes, IIC_iALUr, + 4, IIC_iALUr, [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { let isCommutable = Commutable; } def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - Size4Bytes, IIC_iALUsr, + 4, IIC_iALUsr, [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; } } @@ -1116,9 +1197,8 @@ def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", // The i32imm operand $val can be used by a debugger to store more information // about the breakpoint. -def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM]> { +def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "bkpt", "\t$val", []>, Requires<[IsARM]> { bits<16> val; let Inst{3-0} = val{3-0}; let Inst{19-8} = val{15-4}; @@ -1208,9 +1288,8 @@ def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, let Inst{8-0} = 0; } -def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV7]> { +def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", + []>, Requires<[IsARM, HasV7]> { bits<4> opt; let Inst{27-4} = 0b001100100000111100001111; let Inst{3-0} = opt; @@ -1227,40 +1306,40 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), - Size4Bytes, IIC_iALUr, + 4, IIC_iALUr, [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iLoad_r, + 4, IIC_iLoad_r, [(set GPR:$dst, (load addrmodepc:$addr))]>; def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iLoad_bh_r, + 4, IIC_iLoad_bh_r, [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>; def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iLoad_bh_r, + 4, IIC_iLoad_bh_r, [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>; def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iLoad_bh_r, + 4, IIC_iLoad_bh_r, [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>; def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iLoad_bh_r, + 4, IIC_iLoad_bh_r, [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>; + 4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>; def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src, + 4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; + 4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 @@ -1282,11 +1361,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let Inst{11-0} = label; } def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), - Size4Bytes, IIC_iALUi, []>; + 4, IIC_iALUi, []>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - Size4Bytes, IIC_iALUi, []>; + 4, IIC_iALUi, []>; //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1319,22 +1398,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let Inst{3-0} = dst; } - // For disassembly only. - def BX_pred : AXI<(outs), (ins GPR:$dst, pred:$p), BrMiscFrm, IIC_Br, - "bx$p\t$dst", [/* pattern left blank */]>, + def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, + "bx", "\t$dst", [/* pattern left blank */]>, Requires<[IsARM, HasV4T]> { bits<4> dst; let Inst{27-4} = 0b000100101111111111110001; let Inst{3-0} = dst; } - - // ARMV4 only - // FIXME: We would really like to define this as a vanilla ARMPat like: - // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)> - // With that, however, we can't set isBranch, isTerminator, etc.. - def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst), - Size4Bytes, IIC_Br, [(brind GPR:$dst)]>, - Requires<[IsARM, NoV4T]>; } // All calls clobber the non-callee saved registers. SP is marked as @@ -1386,12 +1456,12 @@ let isCall = 1, // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, HasV4T, IsNotDarwin]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, NoV4T, IsNotDarwin]>; } @@ -1401,131 +1471,82 @@ let isCall = 1, // moved above / below calls. Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], Uses = [R7, SP] in { - def BLr9 : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops), - Size4Bytes, IIC_Br, - [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; - - def BLr9_pred : ARMPseudoInst<(outs), - (ins bltarget:$func, pred:$p, variable_ops), - Size4Bytes, IIC_Br, - [(ARMcall_pred tglobaladdr:$func)]>, + def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), + 4, IIC_Br, + [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, + Requires<[IsARM, IsDarwin]>; + + def BLr9_pred : ARMPseudoExpand<(outs), + (ins bl_target:$func, pred:$p, variable_ops), + 4, IIC_Br, + [(ARMcall_pred tglobaladdr:$func)], + (BL_pred bl_target:$func, pred:$p)>, Requires<[IsARM, IsDarwin]>; // ARMv5T and above - def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops), - Size4Bytes, IIC_Br, - [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>; - - def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p, variable_ops), - Size4Bytes, IIC_Br, - [(ARMcall_pred GPR:$func)]>, + def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), + 4, IIC_Br, + [(ARMcall GPR:$func)], + (BLX GPR:$func)>, + Requires<[IsARM, HasV5T, IsDarwin]>; + + def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), + 4, IIC_Br, + [(ARMcall_pred GPR:$func)], + (BLX_pred GPR:$func, pred:$p)>, Requires<[IsARM, HasV5T, IsDarwin]>; // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, HasV4T, IsDarwin]>; // ARMv4 def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, NoV4T, IsDarwin]>; } -// Tail calls. - -// FIXME: The Thumb versions of these should live in ARMInstrThumb.td -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; - - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; - - def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsARM, IsDarwin]>; - - def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsThumb, IsDarwin]>; - - def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsARM, IsDarwin]>; - - def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsThumb, IsDarwin]>; - } - - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; - - def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; - - def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsARM, IsNotDarwin]>; - - def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsThumb, IsNotDarwin]>; - - def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsARM, IsNotDarwin]>; - def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - Size4Bytes, IIC_Br, - []>, Requires<[IsThumb, IsNotDarwin]>; +let isBranch = 1, isTerminator = 1 in { + // FIXME: should be able to write a pattern for ARMBrcond, but can't use + // a two-value operand where a dag node expects two operands. :( + def Bcc : ABI<0b1010, (outs), (ins br_target:$target), + IIC_Br, "b", "\t$target", + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { + bits<24> target; + let Inst{23-0} = target; } -} -let isBranch = 1, isTerminator = 1 in { - // B is "predicable" since it's just a Bcc with an 'always' condition. let isBarrier = 1 in { + // B is "predicable" since it's just a Bcc with an 'always' condition. let isPredicable = 1 in // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly // should be sufficient. - def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br, - [(br bb:$target)]>; + // FIXME: Is B really a Barrier? That doesn't seem right. + def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br, + [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), (ins GPR:$target, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, + 0, IIC_Br, [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; // FIXME: This shouldn't use the generic "addrmode2," but rather be split // into i12 and rs suffixed versions. def BR_JTm : ARMPseudoInst<(outs), (ins addrmode2:$target, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, + 0, IIC_Br, [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]>; def BR_JTadd : ARMPseudoInst<(outs), (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, + 0, IIC_Br, [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>; } // isNotDuplicable = 1, isIndirectBranch = 1 } // isBarrier = 1 - // FIXME: should be able to write a pattern for ARMBrcond, but can't use - // a two-value operand where a dag node expects two operands. :( - def Bcc : ABI<0b1010, (outs), (ins br_target:$target), - IIC_Br, "b", "\t$target", - [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { - bits<24> target; - let Inst{23-0} = target; - } } // BLX (immediate) -- for disassembly only @@ -1538,14 +1559,65 @@ def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary, let Inst{24} = target{0}; } -// Branch and Exchange Jazelle -- for disassembly only +// Branch and Exchange Jazelle def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", - [/* For disassembly only; pattern left blank */]> { + [/* pattern left blank */]> { + bits<4> func; let Inst{23-20} = 0b0010; - //let Inst{19-8} = 0xfff; + let Inst{19-8} = 0xfff; let Inst{7-4} = 0b0010; + let Inst{3-0} = func; +} + +// Tail calls. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + // Darwin versions. + let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], + Uses = [SP] in { + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>, Requires<[IsDarwin]>; + + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>, Requires<[IsDarwin]>; + + def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), + 4, IIC_Br, [], + (Bcc br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM, IsDarwin]>; + + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (BX GPR:$dst)>, + Requires<[IsARM, IsDarwin]>; + + } + + // Non-Darwin versions (the difference is R9). + let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], + Uses = [SP] in { + def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>, Requires<[IsNotDarwin]>; + + def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>, Requires<[IsNotDarwin]>; + + def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), + 4, IIC_Br, [], + (Bcc br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM, IsNotDarwin]>; + + def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (BX GPR:$dst)>, + Requires<[IsARM, IsNotDarwin]>; + } } + + + + // Secure Monitor Call is a system instruction -- for disassembly only def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", [/* For disassembly only; pattern left blank */]> { @@ -1562,7 +1634,6 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", let Inst{23-0} = svc; } } -def : MnemonicAlias<"swi", "svc">; // Store Return State is a system instruction -- for disassembly only let isCodeGenOnly = 1 in { // FIXME: This should not use submode! @@ -1908,10 +1979,12 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr), multiclass arm_ldst_mult<string asm, bit L_bit, Format f, InstrItinClass itin, InstrItinClass itin_upd> { + // IA is the default, so no need for an explicit suffix on the + // mnemonic here. Without it is the cannonical spelling. def IA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "${p}\t$Rn, $regs"), "", []> { let Inst{24-23} = 0b01; // Increment After let Inst{21} = 0; // No writeback let Inst{20} = L_bit; @@ -1919,7 +1992,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def IA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -1984,17 +2057,14 @@ defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; } // neverHasSideEffects -// Load / Store Multiple Mnemonic Aliases -def : MnemonicAlias<"ldm", "ldmia">; -def : MnemonicAlias<"stm", "stmia">; - // FIXME: remove when we have a way to marking a MI with these properties. // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - Size4Bytes, IIC_iLoad_mBr, []>, +def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + 4, IIC_iLoad_mBr, [], + (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, RegConstraint<"$Rn = $wb">; //===----------------------------------------------------------------------===// @@ -2164,7 +2234,7 @@ defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; def SBFX : I<(outs GPR:$Rd), (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { bits<4> Rd; @@ -2181,7 +2251,7 @@ def SBFX : I<(outs GPR:$Rd), def UBFX : I<(outs GPR:$Rd), (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { bits<4> Rd; @@ -2202,10 +2272,10 @@ def UBFX : I<(outs GPR:$Rd), defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>>; + BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; // ADD and SUB with 's' bit set. defm ADDS : AI1_bin_s_irs<0b0100, "adds", @@ -2216,9 +2286,11 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, + "ADC", 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>, + "SBC">; // ADC and SUBC with 's' bit set. let usesCustomInserter = 1 in { @@ -2271,13 +2343,13 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), // NOTE: CPSR def omitted because it will be handled by the custom inserter. let usesCustomInserter = 1 in { def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - Size4Bytes, IIC_iALUi, + 4, IIC_iALUi, [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>; def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - Size4Bytes, IIC_iALUr, + 4, IIC_iALUr, [/* For disassembly only; pattern left blank */]>; def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - Size4Bytes, IIC_iALUsr, + 4, IIC_iALUsr, [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>; } @@ -2325,10 +2397,10 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), // NOTE: CPSR def omitted because it will be handled by the custom inserter. let usesCustomInserter = 1, Uses = [CPSR] in { def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - Size4Bytes, IIC_iALUi, + 4, IIC_iALUi, [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>; def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - Size4Bytes, IIC_iALUsr, + 4, IIC_iALUsr, [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>; } @@ -2528,19 +2600,19 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; defm AND : AsI1_bin_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "bfc", "\t$Rd, $imm", "$src = $Rd", [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { @@ -2555,7 +2627,7 @@ def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), // A8.6.18 BFI - Bitfield insert (Encoding A1) def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm))]>, @@ -2575,7 +2647,7 @@ def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), let isAsmParserOnly = 1 in def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, lsb_pos_imm:$lsb, width_imm:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd", []>, Requires<[IsARM, HasV6T2]> { bits<4> Rd; @@ -2652,31 +2724,26 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{3-0} = Rn; } +// FIXME: The v5 pseudos are only necessary for the additional Constraint +// property. Remove them when it's possible to add those properties +// on an individual MachineInstr, not just an instuction description. let isCommutable = 1 in { -let Constraints = "@earlyclobber $Rd" in -def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, - pred:$p, cc_out:$s), - Size4Bytes, IIC_iMUL32, - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, NoV6]>; - def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b0000; } -} let Constraints = "@earlyclobber $Rd" in -def MLAv5: ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC32, - [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, NoV6]> { - bits<4> Ra; - let Inst{15-12} = Ra; +def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, + pred:$p, cc_out:$s), + 4, IIC_iMUL32, + [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))], + (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; } + def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, @@ -2685,6 +2752,14 @@ def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), let Inst{15-12} = Ra; } +let Constraints = "@earlyclobber $Rd" in +def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), + 4, IIC_iMAC32, + [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))], + (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; + def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, @@ -2700,49 +2775,34 @@ def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), } // Extra precision multiplies with low / high results - let neverHasSideEffects = 1 in { let isCommutable = 1 in { -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { -def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMUL64, []>, - Requires<[IsARM, NoV6]>; - -def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMUL64, []>, - Requires<[IsARM, NoV6]>; -} - def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, + (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV6]>; def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, + (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV6]>; -} -// Multiply + accumulate let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { -def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC64, []>, - Requires<[IsARM, NoV6]>; -def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), +def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC64, []>, + 4, IIC_iMUL64, [], + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; -def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), + +def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC64, []>, + 4, IIC_iMUL64, [], + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; - +} } +// Multiply + accumulate def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, @@ -2765,6 +2825,25 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), let Inst{11-8} = Rm; let Inst{3-0} = Rn; } + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, IIC_iMAC64, [], + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, IIC_iMAC64, [], + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p), + 4, IIC_iMAC64, [], + (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>, + Requires<[IsARM, NoV6]>; +} + } // neverHasSideEffects // Most significant word multiply @@ -3005,31 +3084,22 @@ def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rev", "\t$Rd, $Rm", [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>; +let AddedComplexity = 5 in def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", - [(set GPR:$Rd, - (or (and (srl GPR:$Rm, (i32 8)), 0xFF), - (or (and (shl GPR:$Rm, (i32 8)), 0xFF00), - (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000), - (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>, + [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>, Requires<[IsARM, HasV6]>; +let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", - [(set GPR:$Rd, - (sext_inreg - (or (srl GPR:$Rm, (i32 8)), - (shl GPR:$Rm, (i32 8))), i16))]>, + [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>, Requires<[IsARM, HasV6]>; -def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), - (shl GPR:$Rm, (i32 8))), i16), +def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), + (and (srl GPR:$Rm, (i32 8)), 0xFF)), (REVSH GPR:$Rm)>; -// Need the AddedComplexity or else MOVs + REV would be chosen. -let AddedComplexity = 5 in -def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>; - def lsl_shift_imm : SDNodeXForm<imm, [{ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); return CurDAG->getTargetConstant(Sh, MVT::i32); @@ -3177,26 +3247,26 @@ def BCCZi64 : PseudoInst<(outs), // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), - Size4Bytes, IIC_iCMOVr, + 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; def MOVCCs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg:$shift, pred:$p), - Size4Bytes, IIC_iCMOVsr, + 4, IIC_iCMOVsr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; let isMoveImm = 1 in def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm, pred:$p), - Size4Bytes, IIC_iMOVi, + 4, IIC_iMOVi, []>, RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_imm:$imm, pred:$p), - Size4Bytes, IIC_iCMOVi, + 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; @@ -3204,12 +3274,12 @@ def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), let isMoveImm = 1 in def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, i32imm:$src, pred:$p), - Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; + 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_imm:$imm, pred:$p), - Size4Bytes, IIC_iCMOVi, + 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; } // neverHasSideEffects @@ -3235,19 +3305,20 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dsb", "\t$opt", - [/* For disassembly only; pattern left blank */]>, + "dsb", "\t$opt", []>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff04; let Inst{3-0} = opt; } -// ISB has only full system option -- for disassembly only -def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>, +// ISB has only full system option +def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, + "isb", "\t$opt", []>, Requires<[IsARM, HasDB]> { + bits<4> opt; let Inst{31-4} = 0xf57ff06; - let Inst{3-0} = 0b1111; + let Inst{3-0} = opt; } let usesCustomInserter = 1 in { @@ -3410,8 +3481,8 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", // Coprocessor Instructions. // -def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), +def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { @@ -3431,8 +3502,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { @@ -3455,7 +3526,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, class ACI<dag oops, dag iops, string opc, string asm, IndexMode im = IndexModeNone> - : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary, + : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary, opc, asm, "", [/* For disassembly only; pattern left blank */]> { let Inst{27-25} = 0b110; } @@ -3583,8 +3654,8 @@ class MovRCopro<string opc, bit direction, dag oops, dag iops, def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (outs), - (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), + (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, @@ -3620,8 +3691,8 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops, def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, (outs), - (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), + (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, @@ -3635,7 +3706,7 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, class MovRRCopro<string opc, bit direction, list<dag> pattern = [/* For disassembly only */]> - : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, + : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{23-21} = 0b010; @@ -3661,7 +3732,7 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = [/* For disassembly only */]> - : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, + : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { let Inst{31-28} = 0b1111; @@ -3812,6 +3883,13 @@ def Int_eh_sjlj_dispatchsetup : // Non-Instruction Patterns // +// ARMv4 indirect branch using (MOVr PC, dst) +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in + def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst), + 4, IIC_Br, [(brind GPR:$dst)], + (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, + Requires<[IsARM, NoV4T]>; + // Large immediate handling. // 32-bit immediate using two piece so_imms or movw + movt. @@ -3977,3 +4055,22 @@ include "ARMInstrVFP.td" include "ARMInstrNEON.td" +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +// Memory barriers +def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>; +def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>; +def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>; + +// System instructions +def : MnemonicAlias<"swi", "svc">; + +// Load / Store Multiple +def : MnemonicAlias<"ldmfd", "ldm">; +def : MnemonicAlias<"ldmia", "ldm">; +def : MnemonicAlias<"stmfd", "stmdb">; +def : MnemonicAlias<"stmia", "stm">; +def : MnemonicAlias<"stmea", "stm">; + diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 79d95d9b2683..0df62f456343 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -175,7 +175,7 @@ class VLDQQWBPseudo<InstrItinClass itin> (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; class VLDQQQQPseudo<InstrItinClass itin> - : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">; + : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">; class VLDQQQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, @@ -1387,7 +1387,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]> { + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ let Rm = 0b1111; } class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -3793,7 +3793,8 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VCNTiD, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; + [(set DPR:$Vd, + (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), @@ -3803,7 +3804,8 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VCNTiQ, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; + [(set QPR:$Vd, + (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), @@ -4212,17 +4214,12 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // Vector Move Operations. // VMOV : Vector Move (Register) +def : InstAlias<"vmov${p} $Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +def : InstAlias<"vmov${p} $Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; let neverHasSideEffects = 1 in { -def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm), - N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> { - let Vn{4-0} = Vm{4-0}; -} -def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm), - N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> { - let Vn{4-0} = Vm{4-0}; -} - // Pseudo vector move instructions for QQ and QQQQ registers. This should // be expanded after register allocation is completed. def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), @@ -4702,11 +4699,10 @@ def VEXTd32 : VEXTd<"vext", "32", v2i32> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } -def VEXTdf : VEXTd<"vext", "32", v2f32> { - let Inst{11-10} = index{1-0}; - let Inst{9-8} = 0b00; - -} +def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), + (v2f32 DPR:$Vm), + (i32 imm:$index))), + (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; def VEXTq8 : VEXTq<"vext", "8", v16i8> { let Inst{11-8} = index{3-0}; @@ -4719,10 +4715,10 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } -def VEXTqf : VEXTq<"vext", "32", v4f32> { - let Inst{11-10} = index{1-0}; - let Inst{9-8} = 0b00; -} +def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), + (v4f32 QPR:$Vm), + (i32 imm:$index))), + (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; // VTRN : Vector Transpose diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 4777189934c0..bfe83eceb13f 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -26,17 +26,14 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); }]>; -/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. -def imm0_7 : ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 8; -}]>; def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255 : ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 256; -}]>; +def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; } +def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { + let ParserMatchClass = imm0_255_asmoperand; +} def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; @@ -74,10 +71,12 @@ def t_adrlabel : Operand<i32> { // Scaled 4 immediate. def t_imm_s4 : Operand<i32> { let PrintMethod = "printThumbS4ImmOperand"; + let OperandType = "OPERAND_IMMEDIATE"; } // Define Thumb specific addressing modes. +let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand<OtherVT> { let EncoderMethod = "getThumbBRTargetOpValue"; } @@ -97,6 +96,7 @@ def t_bltarget : Operand<i32> { def t_blxtarget : Operand<i32> { let EncoderMethod = "getThumbBLXTargetOpValue"; } +} def MemModeRegThumbAsmOperand : AsmOperandClass { let Name = "MemModeRegThumb"; @@ -360,27 +360,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // Control Flow Instructions. // -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", - [(ARMretflag)]>, - T1Special<{1,1,0,?}> { - // A6.2.3 & A8.6.25 - let Inst{6-3} = 0b1110; // Rm = lr - let Inst{2-0} = 0b000; - } - - // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm), - IIC_Br, "bx\t$Rm", - []>, - T1Special<{1,1,0,?}> { - // A6.2.3 & A8.6.25 - bits<4> Rm; - let Inst{6-3} = Rm; - let Inst{2-0} = 0b000; - } -} - // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>, @@ -390,31 +369,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let Inst{6-3} = Rm; let Inst{2-0} = 0b000; } - - def tBRIND : TI<(outs), (ins GPR:$Rm), - IIC_Br, - "mov\tpc, $Rm", - [(brind GPR:$Rm)]>, - T1Special<{1,0,?,?}> { - // A8.6.97 - bits<4> Rm; - let Inst{7} = 1; // <Rd> = Inst{7:2-0} = pc - let Inst{6-3} = Rm; - let Inst{2-0} = 0b111; - } } -// FIXME: remove when we have a way to marking a MI with these properties. -let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, - hasExtraDefRegAllocReq = 1 in -def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), - IIC_iPop_Br, - "pop${p}\t$regs", []>, - T1Misc<{1,1,0,?,?,?,?}> { - // A8.6.121 - bits<16> regs; - let Inst{8} = regs{15}; // registers = P:'0000000':register_list - let Inst{7-0} = regs{7-0}; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br, + [(ARMretflag)], (tBX LR, pred:$p)>; + + // Alternative return instruction used by vararg functions. + def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p), + 2, IIC_Br, [], + (tBX GPR:$Rm, pred:$p)>; } // All calls clobber the non-callee saved registers. SP is marked as a use to @@ -464,7 +428,7 @@ let isCall = 1, // ARMv4T def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), - Size4Bytes, IIC_Br, + 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>; } @@ -516,7 +480,7 @@ let isCall = 1, // ARMv4T def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), - Size4Bytes, IIC_Br, + 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only, IsDarwin]>; } @@ -534,12 +498,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { // Just a pseudo for a tBL instruction. Needed to let regalloc know about // the clobber of LR. let Defs = [LR] in - def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target), - Size4Bytes, IIC_Br, []>; + def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target), + 4, IIC_Br, [], (tBL t_bltarget:$target)>; def tBR_JTr : tPseudoInst<(outs), (ins tGPR:$target, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, + 0, IIC_Br, [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> { list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } @@ -583,6 +547,33 @@ let isBranch = 1, isTerminator = 1 in { } } +// Tail calls +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + // Darwin versions. + let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], + Uses = [SP] in { + // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls + // on Darwin), so it's in ARMInstrThumb2.td. + def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (tBX GPR:$dst, (ops 14, zero_reg))>, + Requires<[IsThumb, IsDarwin]>; + } + // Non-Darwin versions (the difference is R9). + let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], + Uses = [SP] in { + def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops), + 4, IIC_Br, [], + (tB t_brtarget:$dst)>, + Requires<[IsThumb, IsNotDarwin]>; + def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (tBX GPR:$dst, (ops 14, zero_reg))>, + Requires<[IsThumb, IsNotDarwin]>; + } +} + + // A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only // A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1111 then SEE SVC @@ -685,19 +676,6 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -// Special instruction for restore. It cannot clobber condition register -// when it's expanded by eliminateCallFramePseudoInstr(). -let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in -// FIXME: Pseudo for tLDRspi -def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i, - "ldr", "\t$dst, $addr", []>, - T1LdStSP<{1,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -739,9 +717,9 @@ defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1, // A8.6.207 & A8.6.205 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2, - t_addrmode_is2, AddrModeT1_2, - IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", - BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + t_addrmode_is2, AddrModeT1_2, + IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", + BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, @@ -754,19 +732,6 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, let Inst{7-0} = addr; } -let mayStore = 1, neverHasSideEffects = 1 in -// Special instruction for spill. It cannot clobber condition register when it's -// expanded by eliminateCallFramePseudoInstr(). -// FIXME: Pseudo for tSTRspi -def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i, - "str", "\t$src, $addr", []>, - T1LdStSP<{0,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -911,7 +876,8 @@ def tADC : // A8.6.2 // Add immediate def tADDi3 : // A8.6.4 T1 - T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi, + T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), + IIC_iALUi, "add", "\t$Rd, $Rm, $imm3", [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> { bits<3> imm3; @@ -1071,7 +1037,7 @@ def tLSRrr : // A8.6.91 // Move register let isMoveImm = 1 in -def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi, +def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, "mov", "\t$Rd, $imm8", [(set tGPR:$Rd, imm0_255:$imm8)]>, T1General<{1,0,0,?,?}> { @@ -1082,18 +1048,18 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi, let Inst{7-0} = imm8; } -// TODO: A7-73: MOV(2) - mov setting flag. +// A7-73: MOV(2) - mov setting flag. let neverHasSideEffects = 1 in { -// FIXME: Make this predicable. -def tMOVr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<0b1000> { +def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, + 2, IIC_iMOVr, + "mov", "\t$Rd, $Rm", "", []>, + T1Special<{1,0,?,?}> { // A8.6.97 bits<4> Rd; bits<4> Rm; - // Bits {7-6} are encoded by the T1Special value. - let Inst{5-3} = Rm{2-0}; + let Inst{7} = Rd{3}; + let Inst{6-3} = Rm; let Inst{2-0} = Rd{2-0}; } let Defs = [CPSR] in @@ -1106,39 +1072,6 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, let Inst{5-3} = Rm; let Inst{2-0} = Rd; } - -// FIXME: Make these predicable. -def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<{1,0,0,?}> { - // A8.6.97 - bits<4> Rd; - bits<4> Rm; - // Bit {7} is encoded by the T1Special value. - let Inst{6-3} = Rm; - let Inst{2-0} = Rd{2-0}; -} -def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<{1,0,?,0}> { - // A8.6.97 - bits<4> Rd; - bits<4> Rm; - // Bit {6} is encoded by the T1Special value. - let Inst{7} = Rd{3}; - let Inst{5-3} = Rm{2-0}; - let Inst{2-0} = Rd{2-0}; -} -def tMOVgpr2gpr : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, - "mov\t$Rd, $Rm", []>, - T1Special<{1,0,?,?}> { - // A8.6.97 - bits<4> Rd; - bits<4> Rm; - let Inst{7} = Rd{3}; - let Inst{6-3} = Rm; - let Inst{2-0} = Rd{2-0}; -} } // neverHasSideEffects // Multiply register @@ -1175,31 +1108,16 @@ def tREV16 : // A8.6.135 T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", - [(set tGPR:$Rd, - (or (and (srl tGPR:$Rm, (i32 8)), 0xFF), - (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00), - (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000), - (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>, + [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; def tREVSH : // A8.6.136 T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", - [(set tGPR:$Rd, - (sext_inreg - (or (srl tGPR:$Rm, (i32 8)), - (shl tGPR:$Rm, (i32 8))), i16))]>, + [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), - (shl tGPR:$Rm, (i32 8))), i16), - (tREVSH tGPR:$Rm)>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; - -def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; - // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), @@ -1294,31 +1212,6 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. NoItinerary, [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; - -// 16-bit movcc in IT blocks for Thumb2. -let neverHasSideEffects = 1 in { -def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr, - "mov", "\t$Rdn, $Rm", []>, - T1Special<{1,0,?,?}> { - bits<4> Rdn; - bits<4> Rm; - let Inst{7} = Rdn{3}; - let Inst{6-3} = Rm; - let Inst{2-0} = Rdn{2-0}; -} - -let isMoveImm = 1 in -def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi, - "mov", "\t$Rdn, $Rm", []>, - T1General<{1,0,0,?,?}> { - bits<3> Rdn; - bits<8> Rm; - let Inst{10-8} = Rdn; - let Inst{7-0} = Rm; -} - -} // neverHasSideEffects - // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. @@ -1333,118 +1226,22 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), - Size2Bytes, IIC_iALUi, []>; + 2, IIC_iALUi, []>; def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - Size2Bytes, IIC_iALUi, []>; - -//===----------------------------------------------------------------------===// -// Move between coprocessor and ARM core register -- for disassembly only -// - -class tMovRCopro<string opc, bit direction, dag oops, dag iops, - list<dag> pattern> - : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), - pattern> { - let Inst{27-24} = 0b1110; - let Inst{20} = direction; - let Inst{4} = 1; - - bits<4> Rt; - bits<4> cop; - bits<3> opc1; - bits<3> opc2; - bits<4> CRm; - bits<4> CRn; - - let Inst{15-12} = Rt; - let Inst{11-8} = cop; - let Inst{23-21} = opc1; - let Inst{7-5} = opc2; - let Inst{3-0} = CRm; - let Inst{19-16} = CRn; -} - -def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, - (outs), - (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), - [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; -def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), - (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - []>; - -def : Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), - (tMRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>, - Requires<[IsThumb, HasV6T2]>; - -class tMovRRCopro<string opc, bit direction, - list<dag> pattern = [/* For disassembly only */]> - : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { - let Inst{27-24} = 0b1100; - let Inst{23-21} = 0b010; - let Inst{20} = direction; - - bits<4> Rt; - bits<4> Rt2; - bits<4> cop; - bits<4> opc1; - bits<4> CRm; - - let Inst{15-12} = Rt; - let Inst{19-16} = Rt2; - let Inst{11-8} = cop; - let Inst{7-4} = opc1; - let Inst{3-0} = CRm; -} - -def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, - imm:$CRm)]>; -def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; - -//===----------------------------------------------------------------------===// -// Other Coprocessor Instructions. For disassembly only. -// -def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { - let Inst{27-24} = 0b1110; - - bits<4> opc1; - bits<4> CRn; - bits<4> CRd; - bits<4> cop; - bits<3> opc2; - bits<4> CRm; - - let Inst{3-0} = CRm; - let Inst{4} = 0; - let Inst{7-5} = opc2; - let Inst{11-8} = cop; - let Inst{15-12} = CRd; - let Inst{19-16} = CRn; - let Inst{23-20} = opc1; -} + 2, IIC_iALUi, []>; //===----------------------------------------------------------------------===// // TLS Instructions // // __aeabi_read_tp preserves the registers r1-r3. -let isCall = 1, Defs = [R0, LR], Uses = [SP] in -def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]> { - // Encoding is 0xf7fffffe. - let Inst = 0xf7fffffe; -} +// This is a pseudo inst so that we can get the encoding right, +// complete with fixup for the aeabi_read_tp function. +let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in +def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, + [(set R0, ARMthread_pointer)]>; //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics @@ -1463,14 +1260,14 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), - AddrModeNone, SizeSpecial, NoItinerary, "","", + AddrModeNone, 0, NoItinerary, "","", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; // FIXME: Non-Darwin version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, Defs = [ R7, LR, SP ] in def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), - AddrModeNone, SizeSpecial, IndexModeNone, + AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, Requires<[IsThumb, IsDarwin]>; @@ -1583,3 +1380,18 @@ def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb, IsThumb1Only]>; + +// Pseudo-instruction for merged POP and return. +// FIXME: remove when we have a way to marking a MI with these properties. +let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, + hasExtraDefRegAllocReq = 1 in +def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops), + 2, IIC_iPop_Br, [], + (tPOP pred:$p, reglist:$regs)>; + +// Indirect branch using "mov pc, $Rm" +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p), + 2, IIC_Br, [(brind GPR:$Rm)], + (tMOVr PC, GPR:$Rm, pred:$p)>; +} diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 598660c69fbe..c2c6cbcac0f5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -44,9 +44,11 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. +def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; } def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getT2SOImmVal(Imm) != -1; }]> { + let ParserMatchClass = t2_so_imm_asmoperand; let EncoderMethod = "getT2SOImmOpValue"; } @@ -463,7 +465,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0, string wide = ""> { + PatFrag opnode, string baseOpc, bit Commutable = 0, + string wide = ""> { // shifted imm def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii, @@ -495,14 +498,31 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, let Inst{26-25} = 0b01; let Inst{24-21} = opcod; } + // Assembly aliases for optional destination operand when it's the same + // as the source operand. + def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>, + Requires<[IsThumb2]>; + def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), + (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + rGPR:$Rm, pred:$p, + cc_out:$s)>, + Requires<[IsThumb2]>; + def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), + (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_reg:$shift, pred:$p, + cc_out:$s)>, + Requires<[IsThumb2]>; } /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need -// the ".w" prefix to indicate that they are wide. +// the ".w" suffix to indicate that they are wide. multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> : - T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">; + PatFrag opnode, string baseOpc, bit Commutable = 0> : + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen @@ -696,18 +716,18 @@ let usesCustomInserter = 1 in { multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), - Size4Bytes, IIC_iALUi, + 4, IIC_iALUi, [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; // register def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - Size4Bytes, IIC_iALUr, + 4, IIC_iALUr, [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; } // shifted register def rs : t2PseudoInst< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - Size4Bytes, IIC_iALUsi, + 4, IIC_iALUsi, [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -1018,7 +1038,8 @@ multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { // supported yet. multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> { def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, - opc, "\t$Rd, $Rm", []> { + opc, "\t$Rd, $Rm", []>, + Requires<[IsThumb2, HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -1028,7 +1049,8 @@ multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> { let Inst{5-4} = 0b00; // rotate } def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr, - opc, "\t$Rd, $Rm, ror $rot", []> { + opc, "\t$Rd, $Rm, ror $rot", []>, + Requires<[IsThumb2, HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -1084,7 +1106,7 @@ multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot), + def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot), IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1142,93 +1164,13 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), - Size4Bytes, IIC_iALUi, []>; + 4, IIC_iALUi, []>; def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - Size4Bytes, IIC_iALUi, + 4, IIC_iALUi, []>; -// FIXME: None of these add/sub SP special instructions should be necessary -// at all for thumb2 since they use the same encodings as the generic -// add/sub instructions. In thumb1 we need them since they have dedicated -// encodings. At the least, they should be pseudo instructions. -// ADD r, sp, {so_imm|i12} -let isCodeGenOnly = 1 in { -def t2ADDrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), - IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b1000; - let Inst{15} = 0; -} -def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), - IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25-20} = 0b100000; - let Inst{15} = 0; -} - -// ADD r, sp, so_reg -def t2ADDrSPs : T2sTwoRegShiftedReg< - (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b1000; - let Inst{15} = 0; -} - -// SUB r, sp, {so_imm|i12} -def t2SUBrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), - IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b1101; - let Inst{15} = 0; -} -def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), - IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11110; - let Inst{25-20} = 0b101010; - let Inst{15} = 0; -} - -// SUB r, sp, so_reg -def t2SUBrSPs : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm), - IIC_iALUsi, - "sub", "\t$Rd, $Rn, $imm", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b1101; - let Inst{19-16} = 0b1101; // Rn = sp - let Inst{15} = 0; -} -} // end isCodeGenOnly = 1 - -// Signed and unsigned division on v7-M -def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, - "sdiv", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-21} = 0b011100; - let Inst{20} = 0b1; - let Inst{15-12} = 0b1111; - let Inst{7-4} = 0b1111; -} - -def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, - "udiv", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-21} = 0b011101; - let Inst{20} = 0b1; - let Inst{15-12} = 0b1111; - let Inst{7-4} = 0b1111; -} - //===----------------------------------------------------------------------===// // Load / store Instructions. // @@ -1668,6 +1610,10 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, let Inst{15} = 0; } +def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, + pred:$p, cc_out:$s)>, + Requires<[IsThumb2]>; + let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", @@ -1788,8 +1734,10 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, + node:$RHS)>, 1>; +defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, + node:$RHS)>>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", @@ -1833,7 +1781,8 @@ def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), // Select Bytes -- for disassembly only def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> { + NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; let Inst{23} = 0b1; @@ -1849,7 +1798,8 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pat = [/* For disassembly only; pattern left blank */], dag iops = (ins rGPR:$Rn, rGPR:$Rm), string asm = "\t$Rd, $Rn, $Rm"> - : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> { + : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0101; let Inst{22-20} = op22_20; @@ -1947,12 +1897,14 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> { + NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, - "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>; + "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]>; // Signed/Unsigned saturate -- for disassembly only @@ -1985,7 +1937,8 @@ def t2SSAT: T2SatI< def t2SSAT16: T2SatI< (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; @@ -2005,10 +1958,11 @@ def t2USAT: T2SatI< let Inst{15} = 0; } -def t2USAT16: T2SatI< - (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary, - "usat16", "\t$dst, $sat_imm, $Rn", - [/* For disassembly only; pattern left blank */]> { +def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), + NoItinerary, + "usat16", "\t$dst, $sat_imm, $Rn", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; let Inst{20} = 0; @@ -2084,17 +2038,18 @@ def t2MOVsra_flag : T2TwoRegShiftImm< defm t2AND : T2I_bin_w_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; + BinOpFrag<(and node:$LHS, (not node:$RHS))>, + "t2BIC">; class T2BitFI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -2194,7 +2149,8 @@ let Constraints = "$src = $Rd" in { defm t2ORN : T2I_bin_irs<0b0011, "orn", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">; + BinOpFrag<(or node:$LHS, (not node:$RHS))>, + "t2ORN", 0, "">; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in @@ -2277,7 +2233,8 @@ def t2UMLAL : T2MulLong<0b110, 0b0000, def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]>; } // neverHasSideEffects // Rounding variants of the below included for disassembly only @@ -2285,7 +2242,8 @@ def t2UMAAL : T2MulLong<0b110, 0b0110, // Most significant word multiply def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2294,7 +2252,8 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, } def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, - "smmulr", "\t$Rd, $Rn, $Rm", []> { + "smmulr", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2305,7 +2264,8 @@ def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, def t2SMMLA : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> { + [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2314,7 +2274,8 @@ def t2SMMLA : T2FourReg< def t2SMMLAR: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> { + "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2324,7 +2285,8 @@ def t2SMMLAR: T2FourReg< def t2SMMLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> { + [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2333,7 +2295,8 @@ def t2SMMLS: T2FourReg< def t2SMMLSR:T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, - "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> { + "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2344,7 +2307,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16)))]> { + (sext_inreg rGPR:$Rm, i16)))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2356,7 +2320,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16))))]> { + (sra rGPR:$Rm, (i32 16))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2368,7 +2333,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16)))]> { + (sext_inreg rGPR:$Rm, i16)))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2380,7 +2346,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16))))]> { + (sra rGPR:$Rm, (i32 16))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2392,7 +2359,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, - (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> { + (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2404,7 +2372,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, - (sra rGPR:$Rm, (i32 16))), (i32 16)))]> { + (sra rGPR:$Rm, (i32 16))), (i32 16)))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2421,7 +2390,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), - (sext_inreg rGPR:$Rm, i16))))]> { + (sext_inreg rGPR:$Rm, i16))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2433,7 +2403,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), - (sra rGPR:$Rm, (i32 16)))))]> { + (sra rGPR:$Rm, (i32 16)))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2445,7 +2416,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), - (sext_inreg rGPR:$Rm, i16))))]> { + (sext_inreg rGPR:$Rm, i16))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2457,7 +2429,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), - (sra rGPR:$Rm, (i32 16)))))]> { + (sra rGPR:$Rm, (i32 16)))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2469,7 +2442,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, - (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> { + (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2481,7 +2455,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, - (sra rGPR:$Rm, (i32 16))), (i32 16))))]> { + (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2496,66 +2471,108 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasThumb2DSP]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD // These are for disassembly only. def t2SMUAD: T2ThreeReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> { + IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } def t2SMUADX:T2ThreeReg_mac< 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> { + IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } def t2SMUSD: T2ThreeReg_mac< 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> { + IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } def t2SMUSDX:T2ThreeReg_mac< 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> { + IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>, + Requires<[IsThumb2, HasThumb2DSP]> { let Inst{15-12} = 0b1111; } def t2SMLAD : T2ThreeReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad", - "\t$Rd, $Rn, $Rm, $Ra", []>; + "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLADX : T2FourReg_mac< 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx", - "\t$Rd, $Rn, $Rm, $Ra", []>; + "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd", - "\t$Rd, $Rn, $Rm, $Ra", []>; + "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx", - "\t$Rd, $Rn, $Rm, $Ra", []>; + "\t$Rd, $Rn, $Rm, $Ra", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald", - "\t$Ra, $Rd, $Rm, $Rn", []>; + "\t$Ra, $Rd, $Rm, $Rn", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx", - "\t$Ra, $Rd, $Rm, $Rn", []>; + "\t$Ra, $Rd, $Rm, $Rn", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld", - "\t$Ra, $Rd, $Rm, $Rn", []>; + "\t$Ra, $Rd, $Rm, $Rn", []>, + Requires<[IsThumb2, HasThumb2DSP]>; def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx", - "\t$Ra, $Rd, $Rm, $Rn", []>; + "\t$Ra, $Rd, $Rm, $Rn", []>, + Requires<[IsThumb2, HasThumb2DSP]>; + +//===----------------------------------------------------------------------===// +// Division Instructions. +// Signed and unsigned division on v7-M +// +def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, + Requires<[HasDivide, IsThumb2]> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b011100; + let Inst{20} = 0b1; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b1111; +} + +def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, + "udiv", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, + Requires<[HasDivide, IsThumb2]> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b011101; + let Inst{20} = 0b1; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b1111; +} //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -2585,25 +2602,16 @@ def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rev16", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, - (or (and (srl rGPR:$Rm, (i32 8)), 0xFF), - (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00), - (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000), - (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>; + [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>; def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, - (sext_inreg - (or (srl rGPR:$Rm, (i32 8)), - (shl rGPR:$Rm, (i32 8))), i16))]>; + [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>; -def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), - (shl rGPR:$Rm, (i32 8))), i16), +def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)), + (and (srl rGPR:$Rm, (i32 8)), 0xFF)), (t2REVSH rGPR:$Rm)>; -def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>; - def t2PKHBT : T2ThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", @@ -2699,33 +2707,21 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -def t2MOVCCr : T2TwoReg< - (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr, - "mov", ".w\t$Rd, $Rm", +def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, pred:$p), + 4, IIC_iCMOVr, [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd"> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b0010; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1111; // Rn - let Inst{14-12} = 0b000; - let Inst{7-4} = 0b0000; -} + RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), - IIC_iCMOVi, "mov", ".w\t$Rd, $imm", +def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, t2_so_imm:$imm, pred:$p), + 4, IIC_iCMOVi, [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd"> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b0010; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1111; // Rn - let Inst{15} = 0; -} + RegConstraint<"$false = $Rd">; +// FIXME: Pseudo-ize these. For now, just mark codegen only. +let isCodeGenOnly = 1 in { let isMoveImm = 1 in def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm), IIC_iCMOVi, @@ -2792,6 +2788,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; +} // isCodeGenOnly = 1 } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -2826,7 +2823,7 @@ def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "", let Inst{3-0} = 0b1111; } -class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern, bits<4> rt2 = 0b1111> : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> { @@ -2842,7 +2839,7 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{19-16} = addr; let Inst{15-12} = Rt; } -class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, +class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern, bits<4> rt2 = 0b1111> : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> { @@ -2861,16 +2858,15 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr", - "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr", - "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, - Size4Bytes, NoItinerary, - "ldrex", "\t$Rt, $addr", "", - []> { +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), + AddrModeNone, 4, NoItinerary, + "ldrexb", "\t$Rt, $addr", "", []>; +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), + AddrModeNone, 4, NoItinerary, + "ldrexh", "\t$Rt, $addr", "", []>; +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), + AddrModeNone, 4, NoItinerary, + "ldrex", "\t$Rt, $addr", "", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; let Inst{11-8} = 0b1111; @@ -2884,7 +2880,7 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone let hasExtraDefRegAllocReq = 1 in def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr), - AddrModeNone, Size4Bytes, NoItinerary, + AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; @@ -2893,14 +2889,16 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), - AddrModeNone, Size4Bytes, NoItinerary, - "strexb", "\t$Rd, $Rt, $addr", "", []>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), - AddrModeNone, Size4Bytes, NoItinerary, - "strexh", "\t$Rd, $Rt, $addr", "", []>; +def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), + (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, 4, NoItinerary, + "strexb", "\t$Rd, $Rt, $addr", "", []>; +def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), + (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, 4, NoItinerary, + "strexh", "\t$Rd, $Rt, $addr", "", []>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), - AddrModeNone, Size4Bytes, NoItinerary, + AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", []> { let Inst{31-27} = 0b11101; @@ -2919,7 +2917,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), - AddrModeNone, Size4Bytes, NoItinerary, + AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; @@ -2940,22 +2938,6 @@ def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex", } //===----------------------------------------------------------------------===// -// TLS Instructions -// - -// __aeabi_read_tp preserves the registers r1-r3. -let isCall = 1, - Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def t2TPsoft : T2XI<(outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]> { - let Inst{31-27} = 0b11110; - let Inst{15-14} = 0b11; - let Inst{12} = 1; - } -} - -//===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return // address and save #0 in R0 for the non-longjmp case. @@ -2973,7 +2955,7 @@ let Defs = QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), - AddrModeNone, SizeSpecial, NoItinerary, "", "", + AddrModeNone, 0, NoItinerary, "", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } @@ -2982,7 +2964,7 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), - AddrModeNone, SizeSpecial, NoItinerary, "", "", + AddrModeNone, 0, NoItinerary, "", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; } @@ -2993,28 +2975,14 @@ let Defs = // // FIXME: remove when we have a way to marking a MI with these properties. -// FIXME: $dst1 should be a def. But the extra ops must be in the end of the -// operand list. // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - IIC_iLoad_mBr, - "ldmia${p}.w\t$Rn!, $regs", - "$Rn = $wb", []> { - bits<4> Rn; - bits<16> regs; - - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; - let Inst{24-23} = 0b01; // Increment After - let Inst{22} = 0; - let Inst{21} = 1; // Writeback - let Inst{20} = 1; - let Inst{19-16} = Rn; - let Inst{15-0} = regs; -} +def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + 4, IIC_iLoad_mBr, [], + (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, + RegConstraint<"$Rn = $wb">; let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in @@ -3036,17 +3004,17 @@ def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br, let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, + 0, IIC_Br, [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; // FIXME: Add a non-pc based case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), (ins GPR:$index, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, []>; + 0, IIC_Br, []>; def t2TBH_JT : t2PseudoInst<(outs), (ins GPR:$index, i32imm:$jt, i32imm:$id), - SizeSpecial, IIC_Br, []>; + 0, IIC_Br, []>; def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, "tbb", "\t[$Rn, $Rm]", []> { @@ -3094,11 +3062,22 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let Inst{10-0} = target{11-1}; } +// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so +// it goes here. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + // Darwin version. + let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], + Uses = [SP] in + def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops), + 4, IIC_Br, [], + (t2B uncondbrtarget:$dst)>, + Requires<[IsThumb2, IsDarwin]>; +} // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), - AddrModeNone, Size2Bytes, IIC_iALUx, + AddrModeNone, 2, IIC_iALUx, "it$mask\t$cc", "", []> { // 16-bit instruction. let Inst{31-16} = 0x0000; @@ -3178,8 +3157,7 @@ def t2WFE : T2I_hint<0b00000010, "wfe", ".w">; def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; def t2SEV : T2I_hint<0b00000100, "sev", ".w">; -def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt", - [/* For disassembly only; pattern left blank */]> { +def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { let Inst{31-20} = 0xf3a; let Inst{15-14} = 0b10; let Inst{12} = 0; @@ -3347,12 +3325,13 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, } //===----------------------------------------------------------------------===// -// Move between coprocessor and ARM core register -- for disassembly only +// Move between coprocessor and ARM core register // -class t2MovRCopro<string opc, bit direction, dag oops, dag iops, +class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, list<dag> pattern> - : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + : T2Cop<Op, oops, iops, + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3373,22 +3352,10 @@ class t2MovRCopro<string opc, bit direction, dag oops, dag iops, let Inst{19-16} = CRn; } -def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */, - (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), - [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; -def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, - c_imm:$CRm, i32imm:$opc2), []>; - -def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, - imm:$CRm, imm:$opc2), - (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; - -class t2MovRRCopro<string opc, bit direction, - list<dag> pattern = [/* For disassembly only */]> - : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), +class t2MovRRCopro<bits<4> Op, string opc, bit direction, + list<dag> pattern = []> + : T2Cop<Op, (outs), + (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { let Inst{27-24} = 0b1100; let Inst{23-21} = 0b010; @@ -3407,19 +3374,77 @@ class t2MovRRCopro<string opc, bit direction, let Inst{3-0} = CRm; } -def t2MCRR2 : t2MovRRCopro<"mcrr2", - 0 /* from ARM core register to coprocessor */, +/* from ARM core register to coprocessor */ +def t2MCR : t2MovRCopro<0b1110, "mcr", 0, + (outs), + (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), + [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, + imm:$CRm, imm:$opc2)]>; +def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, + (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), + [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, + imm:$CRm, imm:$opc2)]>; + +/* from coprocessor to ARM core register */ +def t2MRC : t2MovRCopro<0b1110, "mrc", 1, + (outs GPR:$Rt), + (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + []>; + +def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2), []>; + +def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), + (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; + +def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), + (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; + + +/* from ARM core register to coprocessor */ +def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, + [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + imm:$CRm)]>; +def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, imm:$CRm)]>; -def t2MRRC2 : t2MovRRCopro<"mrrc2", - 1 /* from coprocessor to ARM core register */>; +/* from coprocessor to ARM core register */ +def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>; + +def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>; //===----------------------------------------------------------------------===// -// Other Coprocessor Instructions. For disassembly only. +// Other Coprocessor Instructions. // -def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), +def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), + "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, + imm:$CRm, imm:$opc2)]> { + let Inst{27-24} = 0b1110; + + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; +} + +def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 376bd9607e4b..f1f3cb9c2ecd 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -94,7 +94,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, let Inst{20} = L_bit; } def DIA_UPD : - AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, + variable_ops), IndexModeUpd, itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After @@ -102,7 +103,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, let Inst{20} = L_bit; } def DDB_UPD : - AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, + variable_ops), IndexModeUpd, itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before @@ -124,7 +126,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, let D = VFPNeonDomain; } def SIA_UPD : - AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), + AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, + variable_ops), IndexModeUpd, itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After @@ -136,7 +139,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, let D = VFPNeonDomain; } def SDB_UPD : - AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), + AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, + variable_ops), IndexModeUpd, itin_upd, !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before @@ -162,6 +166,15 @@ defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; +def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, + Requires<[HasVFP2]>; +def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, + Requires<[HasVFP2]>; +def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>, + Requires<[HasVFP2]>; +def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>, + Requires<[HasVFP2]>; + // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores //===----------------------------------------------------------------------===// @@ -860,7 +873,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, } // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in' //===----------------------------------------------------------------------===// -// FP FMA Operations. +// FP Multiply-Accumulate Operations. // def VMLAD : ADbI<0b11100, 0b00, 0, 0, @@ -977,12 +990,12 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), let neverHasSideEffects = 1 in { def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), - Size4Bytes, IIC_fpUNA64, + 4, IIC_fpUNA64, [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, RegConstraint<"$Dn = $Dd">; def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), - Size4Bytes, IIC_fpUNA32, + 4, IIC_fpUNA32, [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd">; } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f4645f15a66f..c6efea1d7806 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -329,13 +329,9 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 - ? ARM::ADDri - : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); + int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; if (Offset < 0) { - BaseOpc = !isThumb2 - ? ARM::SUBri - : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); + BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; Offset = - Offset; } int ImmedOffset = isThumb2 @@ -516,8 +512,6 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2SUBri && - MI->getOpcode() != ARM::t2SUBrSPi && - MI->getOpcode() != ARM::t2SUBrSPi12 && MI->getOpcode() != ARM::tSUBspi && MI->getOpcode() != ARM::SUBri) return false; @@ -541,8 +535,6 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2ADDri && - MI->getOpcode() != ARM::t2ADDrSPi && - MI->getOpcode() != ARM::t2ADDrSPi12 && MI->getOpcode() != ARM::tADDspi && MI->getOpcode() != ARM::ADDri) return false; @@ -1461,19 +1453,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; - const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) + const MCInstrDesc &MCID = I->getDesc(); + if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && TID.mayStore()) + if (isLd && MCID.mayStore()) return false; if (!isLd) { - if (TID.mayLoad()) + if (MCID.mayLoad()) return false; // It's not safe to move the first 'str' down. // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (TID.mayStore()) + if (MCID.mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { @@ -1672,14 +1664,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, Ops.pop_back(); Ops.pop_back(); - const TargetInstrDesc &TID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TID.OpInfo[0].getRegClass(TRI); + const MCInstrDesc &MCID = TII->get(NewOpc); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); MRI->constrainRegClass(EvenReg, TRC); MRI->constrainRegClass(OddReg, TRC); // Form the pair instruction. if (isLd) { - MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID) + MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) .addReg(EvenReg, RegState::Define) .addReg(OddReg, RegState::Define) .addReg(BaseReg); @@ -1691,7 +1683,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumLDRDFormed; } else { - MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID) + MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) .addReg(EvenReg) .addReg(OddReg) .addReg(BaseReg); @@ -1742,8 +1734,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { while (MBBI != E) { for (; MBBI != E; ++MBBI) { MachineInstr *MI = MBBI; - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.isCall() || TID.isTerminator()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.isCall() || MCID.isTerminator()) { // Stop at barriers. ++MBBI; break; diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index c5f727d60642..39be3f0e39f8 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -21,8 +21,11 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); @@ -32,19 +35,30 @@ namespace { class ARMMCCodeEmitter : public MCCodeEmitter { ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; - const ARMSubtarget *Subtarget; - MCContext &Ctx; + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; public: - ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx) - : TM(tm), TII(*TM.getInstrInfo()), - Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) { + ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti) { } ~ARMMCCodeEmitter() {} + bool isThumb() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & ARM::ModeThumb) != 0; + } + bool isThumb2() const { + return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0; + } + bool isTargetDarwin() const { + Triple TT(STI.getTargetTriple()); + Triple::OSType OS = TT.getOS(); + return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS; + } + unsigned getMachineSoImmOpValue(unsigned SoImm) const; // getBinaryCodeForInstr - TableGen'erated function for getting the @@ -320,9 +334,10 @@ public: } // end anonymous namespace -MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM, +MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, MCContext &Ctx) { - return new ARMMCCodeEmitter(TM, Ctx); + return new ARMMCCodeEmitter(MCII, STI, Ctx); } /// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing @@ -330,7 +345,7 @@ MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM, /// Thumb2 mode. unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI, unsigned EncodedValue) const { - if (Subtarget->isThumb2()) { + if (isThumb2()) { // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are // set to 1111. @@ -349,7 +364,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI, /// Thumb2 mode. unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI, unsigned EncodedValue) const { - if (Subtarget->isThumb2()) { + if (isThumb2()) { EncodedValue &= 0xF0FFFFFF; EncodedValue |= 0x09000000; } @@ -362,7 +377,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI, /// Thumb2 mode. unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, unsigned EncodedValue) const { - if (Subtarget->isThumb2()) { + if (isThumb2()) { EncodedValue &= 0x00FFFFFF; EncodedValue |= 0xEE000000; } @@ -374,7 +389,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, /// them to their Thumb2 form if we are currently in Thumb2 mode. unsigned ARMMCCodeEmitter:: VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const { - if (Subtarget->isThumb2()) { + if (isThumb2()) { EncodedValue &= 0x0FFFFFFF; EncodedValue |= 0xE0000000; } @@ -515,7 +530,7 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { // FIXME: This really, really shouldn't use TargetMachine. We don't want // coupling between MC and TM anywhere we can help it. - if (Subtarget->isThumb2()) + if (isThumb2()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups); return getARMBranchTargetOpValue(MI, OpIdx, Fixups); @@ -624,7 +639,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind; - if (Subtarget->isThumb2()) + if (isThumb2()) Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); else Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); @@ -709,22 +724,22 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, switch (ARM16Expr->getKind()) { default: assert(0 && "Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: - if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E)) - Kind = MCFixupKind(Subtarget->isThumb2() + if (!isTargetDarwin() && EvaluateAsPCRel(E)) + Kind = MCFixupKind(isThumb2() ? ARM::fixup_t2_movt_hi16_pcrel : ARM::fixup_arm_movt_hi16_pcrel); else - Kind = MCFixupKind(Subtarget->isThumb2() + Kind = MCFixupKind(isThumb2() ? ARM::fixup_t2_movt_hi16 : ARM::fixup_arm_movt_hi16); break; case ARMMCExpr::VK_ARM_LO16: - if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E)) - Kind = MCFixupKind(Subtarget->isThumb2() + if (!isTargetDarwin() && EvaluateAsPCRel(E)) + Kind = MCFixupKind(isThumb2() ? ARM::fixup_t2_movw_lo16_pcrel : ARM::fixup_arm_movw_lo16_pcrel); else - Kind = MCFixupKind(Subtarget->isThumb2() + Kind = MCFixupKind(isThumb2() ? ARM::fixup_t2_movw_lo16 : ARM::fixup_arm_movw_lo16); break; @@ -898,7 +913,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind; - if (Subtarget->isThumb2()) + if (isThumb2()) Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); else Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); @@ -1274,21 +1289,21 @@ void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { // Pseudo instructions don't get encoded. - const TargetInstrDesc &Desc = TII.get(MI.getOpcode()); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint64_t TSFlags = Desc.TSFlags; if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo) return; + int Size; - // Basic size info comes from the TSFlags field. - switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { - default: llvm_unreachable("Unexpected instruction size!"); - case ARMII::Size2Bytes: Size = 2; break; - case ARMII::Size4Bytes: Size = 4; break; - } + if (Desc.getSize() == 2 || Desc.getSize() == 4) + Size = Desc.getSize(); + else + llvm_unreachable("Unexpected instruction size!"); + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); // Thumb 32-bit wide instructions need to emit the high order halfword // first. - if (Subtarget->isThumb() && Size == 4) { + if (isThumb() && Size == 4) { EmitConstant(Binary >> 16, 2, OS); EmitConstant(Binary & 0xffff, 2, OS); } else diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 59d60506fc0f..7411b599f0fa 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -23,43 +23,94 @@ using namespace llvm; -static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, - ARMAsmPrinter &Printer) { - MCContext &Ctx = Printer.OutContext; +MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, + const MCSymbol *Symbol) { const MCExpr *Expr; switch (MO.getTargetFlags()) { default: { - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx); + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + OutContext); switch (MO.getTargetFlags()) { default: assert(0 && "Unknown target flag on symbol operand"); case 0: break; case ARMII::MO_LO16: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx); - Expr = ARMMCExpr::CreateLower16(Expr, Ctx); + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + OutContext); + Expr = ARMMCExpr::CreateLower16(Expr, OutContext); break; case ARMII::MO_HI16: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx); - Expr = ARMMCExpr::CreateUpper16(Expr, Ctx); + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, + OutContext); + Expr = ARMMCExpr::CreateUpper16(Expr, OutContext); break; } break; } case ARMII::MO_PLT: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx); + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, + OutContext); break; } if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), - Ctx); + MCConstantExpr::Create(MO.getOffset(), + OutContext), + OutContext); return MCOperand::CreateExpr(Expr); } +bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) { + switch (MO.getType()) { + default: + assert(0 && "unknown operand type"); + return false; + case MachineOperand::MO_Register: + // Ignore all non-CPSR implicit register operands. + if (MO.isImplicit() && MO.getReg() != ARM::CPSR) + return false; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, + GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex())); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex())); + break; + case MachineOperand::MO_BlockAddress: + MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress())); + break; + case MachineOperand::MO_FPImmediate: { + APFloat Val = MO.getFPImm()->getValueAPF(); + bool ignored; + Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); + break; + } + } + return true; +} + void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); @@ -68,48 +119,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - switch (MO.getType()) { - default: - MI->dump(); - assert(0 && "unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all non-CPSR implicit register operands. - if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue; - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), AP.OutContext)); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, - AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); - break; - case MachineOperand::MO_JumpTableIndex: - MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); - break; - case MachineOperand::MO_ConstantPoolIndex: - MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); - break; - case MachineOperand::MO_BlockAddress: - MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); - break; - case MachineOperand::MO_FPImmediate: { - APFloat Val = MO.getFPImm()->getValueAPF(); - bool ignored; - Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); - MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); - break; - } - } - - OutMI.addOperand(MCOp); + if (AP.lowerOperand(MO, MCOp)) + OutMI.addOperand(MCOp); } } diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/ARMMachObjectWriter.cpp new file mode 100644 index 000000000000..a36e47da06d4 --- /dev/null +++ b/lib/Target/ARM/ARMMachObjectWriter.cpp @@ -0,0 +1,389 @@ +//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMFixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; +using namespace llvm::object; + +namespace { +class ARMMachObjectWriter : public MCMachObjectTargetWriter { + void RecordARMScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue); + void RecordARMMovwMovtRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue); + +public: + ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, + uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/true) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); +}; +} + +static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, + unsigned &Log2Size) { + RelocType = unsigned(macho::RIT_Vanilla); + Log2Size = ~0U; + + switch (Kind) { + default: + return false; + + case FK_Data_1: + Log2Size = llvm::Log2_32(1); + return true; + case FK_Data_2: + Log2Size = llvm::Log2_32(2); + return true; + case FK_Data_4: + Log2Size = llvm::Log2_32(4); + return true; + case FK_Data_8: + Log2Size = llvm::Log2_32(8); + return true; + + // Handle 24-bit branch kinds. + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: + RelocType = unsigned(macho::RIT_ARM_Branch24Bit); + // Report as 'long', even though that is not quite accurate. + Log2Size = llvm::Log2_32(4); + return true; + + // Handle Thumb branches. + case ARM::fixup_arm_thumb_br: + RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + Log2Size = llvm::Log2_32(2); + return true; + + case ARM::fixup_t2_uncondbranch: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + Log2Size = llvm::Log2_32(4); + return true; + + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + RelocType = unsigned(macho::RIT_ARM_HalfDifference); + // Report as 'long', even though that is not quite accurate. + Log2Size = llvm::Log2_32(4); + return true; + + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + // Report as 'long', even though that is not quite accurate. + Log2Size = llvm::Log2_32(4); + return true; + } +} + +void ARMMachObjectWriter:: +RecordARMMovwMovtRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_ARM_Half; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint32_t Value2 = 0; + uint64_t SecAddr = + Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + Type = macho::RIT_ARM_HalfDifference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field: + // + // For these two r_type relocations they always have a pair following them and + // the r_length bits are used differently. The encoding of the r_length is as + // follows: + // low bit of r_length: + // 0 - :lower16: for movw instructions + // 1 - :upper16: for movt instructions + // high bit of r_length: + // 0 - arm instructions + // 1 - thumb instructions + // the other half of the relocated expression is in the following pair + // relocation entry in the the low 16 bits of r_address field. + unsigned ThumbBit = 0; + unsigned MovtBit = 0; + switch ((unsigned)Fixup.getKind()) { + default: break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + MovtBit = 1; + break; + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + MovtBit = 1; + // Fallthrough + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + ThumbBit = 1; + break; + } + + + if (Type == macho::RIT_ARM_HalfDifference) { + uint32_t OtherHalf = MovtBit + ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); + + macho::RelocationEntry MRE; + MRE.Word0 = ((OtherHalf << 0) | + (macho::RIT_Pair << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + Type = macho::RIT_Difference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Log2Size; + unsigned RelocType = macho::RIT_Vanilla; + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { + report_fatal_error("unknown ARM fixup kind!"); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB()) { + if (RelocType == macho::RIT_ARM_Half || + RelocType == macho::RIT_ARM_HalfDifference) + return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, FixedValue); + return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + } + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // FIXME: For other platforms, we need to use scattered relocations for + // internal relocations with offsets. If this is an internal relocation with + // an offset, it also needs a scattered relocation entry. + // + // Is this right for ARM? + uint32_t Offset = Target.getConstant(); + if (IsPCRel && RelocType == macho::RIT_Vanilla) + Offset += 1 << Log2Size; + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) + return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // FIXME! + report_fatal_error("FIXME: relocations to absolute targets " + "not yet implemented"); + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + + // The type is determined by the fixup kind. + Type = RelocType; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype) { + return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit, + CPUType, + CPUSubtype), + OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 99418733c376..76eb496bde42 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -200,45 +200,16 @@ def FPEXC : ARMReg<8, "fpexc">; // r11 == Frame Pointer (arm-style backtraces) // r10 == Stack Limit // -def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, - R7, R8, R9, R10, R11, R12, - SP, LR, PC]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned ARM_GPR_AO[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, ARM::R11 }; - - // For Thumb1 mode, we don't want to allocate hi regs at all, as we - // don't know how to spill them. If we make our prologue/epilogue code - // smarter at some point, we can go back to using the above allocation - // orders for the Thumb1 instructions that know how to use hi regs. - static const unsigned THUMB_GPR_AO[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; - - GPRClass::iterator - GPRClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.isThumb1Only()) - return THUMB_GPR_AO; - return ARM_GPR_AO; - } - - GPRClass::iterator - GPRClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.isThumb1Only()) - return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); - return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned)); - } +def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), + SP, LR, PC)> { + // Allocate LR as the first CSR since it is always saved anyway. + // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't + // know how to spill them. If we make our prologue/epilogue code smarter at + // some point, we can go back to using the above allocation orders for the + // Thumb1 instructions that know how to use hi regs. + let AltOrders = [(add LR, GPR), (trunc GPR, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } @@ -246,263 +217,98 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, // register range for operands, but have undefined behaviours when PC // or SP (R13 or R15) are used. The ARM ISA refers to these operands // via the BadReg() pseudo-code description. -def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, - R7, R8, R9, R10, R11, R12, LR]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned ARM_rGPR_AO[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, - ARM::R11 }; - - // For Thumb1 mode, we don't want to allocate hi regs at all, as we - // don't know how to spill them. If we make our prologue/epilogue code - // smarter at some point, we can go back to using the above allocation - // orders for the Thumb1 instructions that know how to use hi regs. - static const unsigned THUMB_rGPR_AO[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; - - rGPRClass::iterator - rGPRClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.isThumb1Only()) - return THUMB_rGPR_AO; - return ARM_rGPR_AO; - } - - rGPRClass::iterator - rGPRClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - - if (Subtarget.isThumb1Only()) - return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned)); - return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned)); - } +def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { + let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } // Thumb registers are R0-R7 normally. Some instructions can still use // the general GPR register class above (MOV, e.g.) -def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {} +def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; + +// The high registers in thumb mode, R8-R15. +def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of // this class and the preceding one(!) This is what we want. -def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // R9 is available. - static const unsigned ARM_GPR_R9_TC[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R9, ARM::R12 }; - // R9 is not available. - static const unsigned ARM_GPR_NOR9_TC[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12 }; - - // For Thumb1 mode, we don't want to allocate hi regs at all, as we - // don't know how to spill them. If we make our prologue/epilogue code - // smarter at some point, we can go back to using the above allocation - // orders for the Thumb1 instructions that know how to use hi regs. - static const unsigned THUMB_GPR_AO_TC[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - - tcGPRClass::iterator - tcGPRClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.isThumb1Only()) - return THUMB_GPR_AO_TC; - return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC; - } - - tcGPRClass::iterator - tcGPRClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - - if (Subtarget.isThumb1Only()) - return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); - - return Subtarget.isTargetDarwin() ? - ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) : - ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - } +def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> { + let AltOrders = [(and tcGPR, tGPR)]; + let AltOrderSelect = [{ + return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); }]; } - // Scalar single precision floating point register class.. -def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, - S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, - S23, S24, S25, S26, S27, S28, S29, S30, S31]>; +def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, - [S0, S1, S2, S3, S4, S5, S6, S7, - S8, S9, S10, S11, S12, S13, S14, S15]>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>; // Scalar double precision floating point / generic 64-bit vector register // class. // ARM requires only word alignment for double. It's more performant if it // is double-word alignment though. def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, - [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // VFP2 / VFPv3-D16 - static const unsigned ARM_DPR_VFP2[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10, ARM::D11, - ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; - // VFP3: D8-D15 are callee saved and should be allocated last. - // Save other low registers for use as DPR_VFP2 and DPR_8 classes. - static const unsigned ARM_DPR_VFP3[] = { - ARM::D16, ARM::D17, ARM::D18, ARM::D19, - ARM::D20, ARM::D21, ARM::D22, ARM::D23, - ARM::D24, ARM::D25, ARM::D26, ARM::D27, - ARM::D28, ARM::D29, ARM::D30, ARM::D31, - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10, ARM::D11, - ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; - - DPRClass::iterator - DPRClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.hasVFP3() && !Subtarget.hasD16()) - return ARM_DPR_VFP3; - return ARM_DPR_VFP2; - } - - DPRClass::iterator - DPRClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.hasVFP3() && !Subtarget.hasD16()) - return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned)); - else - return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned)); - } - }]; + (sequence "D%u", 0, 31)> { + // Allocate non-VFP2 registers D16-D31 first. + let AltOrders = [(rotl DPR, 16)]; + let AltOrderSelect = [{ return 1; }]; } // Subset of DPR that are accessible with VFP2 (and so that also have // 32-bit SPR subregs). def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, - [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15]> { + (trunc DPR, 16)> { let SubRegClasses = [(SPR ssub_0, ssub_1)]; } // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, - [D0, D1, D2, D3, D4, D5, D6, D7]> { + (trunc DPR, 8)> { let SubRegClasses = [(SPR_8 ssub_0, ssub_1)]; } // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { + (sequence "Q%u", 0, 15)> { let SubRegClasses = [(DPR dsub_0, dsub_1)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // Q4-Q7 are callee saved and should be allocated last. - // Save other low registers for use as QPR_VFP2 and QPR_8 classes. - static const unsigned ARM_QPR[] = { - ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, - ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15, - ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, - ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7 }; - - QPRClass::iterator - QPRClass::allocation_order_begin(const MachineFunction &MF) const { - return ARM_QPR; - } - - QPRClass::iterator - QPRClass::allocation_order_end(const MachineFunction &MF) const { - return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned)); - } - }]; + // Allocate non-VFP2 aliases Q8-Q15 first. + let AltOrders = [(rotl QPR, 8)]; + let AltOrderSelect = [{ return 1; }]; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> { + 128, (trunc QPR, 8)> { let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), (DPR_VFP2 dsub_0, dsub_1)]; } // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, - [Q0, Q1, Q2, Q3]> { + 128, (trunc QPR, 4)> { let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3), (DPR_8 dsub_0, dsub_1)]; } // Pseudo 256-bit vector register class to model pairs of Q registers // (4 consecutive D registers). -def QQPR : RegisterClass<"ARM", [v4i64], - 256, - [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { +def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // QQ2-QQ3 are callee saved and should be allocated last. - // Save other low registers for use as QPR_VFP2 and QPR_8 classes. - static const unsigned ARM_QQPR[] = { - ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7, - ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 }; - - QQPRClass::iterator - QQPRClass::allocation_order_begin(const MachineFunction &MF) const { - return ARM_QQPR; - } - - QQPRClass::iterator - QQPRClass::allocation_order_end(const MachineFunction &MF) const { - return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned)); - } - }]; + // Allocate non-VFP2 aliases first. + let AltOrders = [(rotl QQPR, 4)]; + let AltOrderSelect = [{ return 1; }]; } // Subset of QQPR that have 32-bit SPR subregs. -def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], - 256, - [QQ0, QQ1, QQ2, QQ3]> { +def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> { let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), (QPR_VFP2 qsub_0, qsub_1)]; @@ -511,35 +317,16 @@ def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], // Pseudo 512-bit vector register class to model 4 consecutive Q registers // (8 consecutive D registers). -def QQQQPR : RegisterClass<"ARM", [v8i64], - 256, - [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> { +def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // QQQQ1 is callee saved and should be allocated last. - // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes. - static const unsigned ARM_QQQQPR[] = { - ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 }; - - QQQQPRClass::iterator - QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const { - return ARM_QQQQPR; - } - - QQQQPRClass::iterator - QQQQPRClass::allocation_order_end(const MachineFunction &MF) const { - return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned)); - } - }]; + // Allocate non-VFP2 aliases first. + let AltOrders = [(rotl QQQQPR, 2)]; + let AltOrderSelect = [{ return 1; }]; } // Condition code registers. -def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]> { +def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { let isAllocatable = 0; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c6f266b07531..1cab9e44ce75 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -7,17 +7,21 @@ // //===----------------------------------------------------------------------===// // -// This file implements the ARM specific subclass of TargetSubtarget. +// This file implements the ARM specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "ARMSubtarget.h" -#include "ARMGenSubtarget.inc" #include "ARMBaseRegisterInfo.h" #include "llvm/GlobalValue.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "ARMGenSubtargetInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -31,17 +35,25 @@ static cl::opt<bool> StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); -ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, - bool isT) - : ARMArchVersion(V4) +ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS) + : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) - , ARMFPUType(None) + , HasV4TOps(false) + , HasV5TOps(false) + , HasV5TEOps(false) + , HasV6Ops(false) + , HasV6T2Ops(false) + , HasV7Ops(false) + , HasVFPv2(false) + , HasVFPv3(false) + , HasNEON(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) , SlowFPBrcc(false) - , IsThumb(isT) - , ThumbMode(Thumb1) + , InThumbMode(false) + , HasThumb2(false) , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) @@ -56,94 +68,40 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) + , Thumb2DSP(false) , stackAlignment(4) - , CPUString("generic") + , CPUString(CPU) , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { - // Default to soft float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Soft; - // Determine default and user specified characteristics - - // When no arch is specified either by CPU or by attributes, make the default - // ARMv4T. - const char *ARMArchFeature = ""; - if (CPUString == "generic" && (FS.empty() || FS == "generic")) { - ARMArchVersion = V4T; - ARMArchFeature = ",+v4t"; + if (CPUString.empty()) + CPUString = "generic"; + + // Insert the architecture feature derived from the target triple into the + // feature string. This is important for setting features that are implied + // based on the architecture version. + std::string ArchFS = ARM_MC::ParseARMTriple(TT); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS; + else + ArchFS = FS; } + ParseSubtargetFeatures(CPUString, ArchFS); - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - unsigned Len = TT.length(); - unsigned Idx = 0; + // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a + // ARM version or CPU and then remove this. + if (!HasV6T2Ops && hasThumb2()) + HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true; - if (Len >= 5 && TT.substr(0, 4) == "armv") - Idx = 4; - else if (Len >= 6 && TT.substr(0, 5) == "thumb") { - IsThumb = true; - if (Len >= 7 && TT[5] == 'v') - Idx = 6; - } - if (Idx) { - unsigned SubVer = TT[Idx]; - if (SubVer >= '7' && SubVer <= '9') { - ARMArchVersion = V7A; - ARMArchFeature = ",+v7a"; - if (Len >= Idx+2 && TT[Idx+1] == 'm') { - ARMArchVersion = V7M; - ARMArchFeature = ",+v7m"; - } - } else if (SubVer == '6') { - ARMArchVersion = V6; - ARMArchFeature = ",+v6"; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') { - ARMArchVersion = V6T2; - ARMArchFeature = ",+v6t2"; - } - } else if (SubVer == '5') { - ARMArchVersion = V5T; - ARMArchFeature = ",+v5t"; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') { - ARMArchVersion = V5TE; - ARMArchFeature = ",+v5te"; - } - } else if (SubVer == '4') { - if (Len >= Idx+2 && TT[Idx+1] == 't') { - ARMArchVersion = V4T; - ARMArchFeature = ",+v4t"; - } else { - ARMArchVersion = V4; - ARMArchFeature = ""; - } - } - } - - if (TT.find("eabi") != std::string::npos) - TargetABI = ARM_ABI_AAPCS; - - // Parse features string. If the first entry in FS (the CPU) is missing, - // insert the architecture feature derived from the target triple. This is - // important for setting features that are implied based on the architecture - // version. - std::string FSWithArch; - if (FS.empty()) - FSWithArch = std::string(ARMArchFeature); - else if (FS.find(',') == 0) - FSWithArch = std::string(ARMArchFeature) + FS; - else - FSWithArch = FS; - CPUString = ParseSubtargetFeatures(FSWithArch, CPUString); + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUString); // After parsing Itineraries, set ItinData.IssueWidth. computeIssueWidth(); - // Thumb2 implies at least V6T2. - if (ARMArchVersion >= V6T2) - ThumbMode = Thumb2; - else if (ThumbMode >= Thumb2) - ARMArchVersion = V6T2; + if (TT.find("eabi") != std::string::npos) + TargetABI = ARM_ABI_AAPCS; if (isAAPCS_ABI()) stackAlignment = 8; @@ -151,7 +109,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (!isTargetDarwin()) UseMovt = hasV6T2Ops(); else { - IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); + IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); } @@ -247,9 +205,9 @@ void ARMSubtarget::computeIssueWidth() { bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, + TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtarget::ANTIDEP_CRITICAL; + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); CriticalPathRCs.push_back(&ARM::GPRRegClass); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0271c873f191..c6508723a576 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -7,50 +7,49 @@ // //===----------------------------------------------------------------------===// // -// This file declares the ARM specific subclass of TargetSubtarget. +// This file declares the ARM specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H -#include "llvm/Target/TargetInstrItineraries.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtarget.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "ARMGenSubtargetInfo.inc" + namespace llvm { class GlobalValue; +class StringRef; -class ARMSubtarget : public TargetSubtarget { +class ARMSubtarget : public ARMGenSubtargetInfo { protected: - enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M - }; - enum ARMProcFamilyEnum { Others, CortexA8, CortexA9 }; - enum ARMFPEnum { - None, VFPv2, VFPv3, NEON - }; - - enum ThumbTypeEnum { - Thumb1, - Thumb2 - }; - - /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE, - /// V6, V6T2, V7A, V7M. - ARMArchEnum ARMArchVersion; - /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily; - /// ARMFPUType - Floating Point Unit type. - ARMFPEnum ARMFPUType; + /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops - + /// Specify whether target support specific ARM ISA variants. + bool HasV4TOps; + bool HasV5TOps; + bool HasV5TEOps; + bool HasV6Ops; + bool HasV6T2Ops; + bool HasV7Ops; + + /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are + /// supported. + bool HasVFPv2; + bool HasVFPv3; + bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -68,11 +67,11 @@ protected: /// SlowFPBrcc - True if floating point compare + branch is slow. bool SlowFPBrcc; - /// IsThumb - True if we are in thumb mode, false if in ARM mode. - bool IsThumb; + /// InThumbMode - True if compiling for Thumb, false for ARM. + bool InThumbMode; - /// ThumbMode - Indicates supported Thumb version. - ThumbTypeEnum ThumbMode; + /// HasThumb2 - True if Thumb2 instructions are supported. + bool HasThumb2; /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; @@ -128,6 +127,10 @@ protected: /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). bool AllowsUnalignedMem; + /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith + /// and such) instructions in Thumb2 code. + bool Thumb2DSP; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -154,7 +157,8 @@ protected: /// This constructor initializes the data members to match that /// of the specified triple. /// - ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb); + ARMSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -165,28 +169,28 @@ protected: } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); void computeIssueWidth(); - bool hasV4TOps() const { return ARMArchVersion >= V4T; } - bool hasV5TOps() const { return ARMArchVersion >= V5T; } - bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } - bool hasV6Ops() const { return ARMArchVersion >= V6; } - bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } - bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool hasV4TOps() const { return HasV4TOps; } + bool hasV5TOps() const { return HasV5TOps; } + bool hasV5TEOps() const { return HasV5TEOps; } + bool hasV6Ops() const { return HasV6Ops; } + bool hasV6T2Ops() const { return HasV6T2Ops; } + bool hasV7Ops() const { return HasV7Ops; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool hasARMOps() const { return !NoARM; } - bool hasVFP2() const { return ARMFPUType >= VFPv2; } - bool hasVFP3() const { return ARMFPUType >= VFPv3; } - bool hasNEON() const { return ARMFPUType >= NEON; } + bool hasVFP2() const { return HasVFPv2; } + bool hasVFP3() const { return HasVFPv3; } + bool hasNEON() const { return HasNEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } + bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } @@ -197,6 +201,7 @@ protected: bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool hasMPExtension() const { return HasMPExtension; } + bool hasThumb2DSP() const { return Thumb2DSP; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } @@ -209,10 +214,10 @@ protected: bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } - bool isThumb() const { return IsThumb; } - bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); } - bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } - bool hasThumb2() const { return ThumbMode >= Thumb2; } + bool isThumb() const { return InThumbMode; } + bool isThumb1Only() const { return InThumbMode && !HasThumb2; } + bool isThumb2() const { return InThumbMode && HasThumb2; } + bool hasThumb2() const { return HasThumb2; } bool isR9Reserved() const { return IsR9Reserved; } @@ -226,7 +231,7 @@ protected: /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, + TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; /// getInstrItins - Return the instruction itineraies based on subtarget diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 29aa4f7ad2ce..f0b176ad6981 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "ARMTargetMachine.h" -#include "ARMMCAsmInfo.h" #include "ARMFrameLowering.h" #include "ARM.h" #include "llvm/PassManager.h" @@ -22,15 +21,6 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return new ARMMCAsmInfoDarwin(); - - return new ARMELFMCAsmInfo(); -} - // This is duplicated code. Refactor this. static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, @@ -56,10 +46,6 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); - // Register the target asm info. - RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo); - RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo); - // Register the MC Code Emitter TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); @@ -78,18 +64,23 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, - bool isThumb) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, isThumb), + const std::string &CPU, + const std::string &FS) + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); + + // Default to soft float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Soft; } ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), + : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32") : @@ -105,8 +96,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : ARMBaseTargetMachine(T, TT, FS, true), + : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index e0aa149c4cc2..bc3d46a50ea5 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,7 +41,7 @@ private: public: ARMBaseTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool isThumb); + const std::string &CPU, const std::string &FS); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -70,7 +70,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMFrameLowering FrameLowering; public: ARMTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -109,7 +109,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { OwningPtr<ARMFrameLowering> FrameLowering; public: ThumbTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index 2428ce16d3d5..d9a5fa223b4b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -87,8 +87,9 @@ public: : ARMBaseAsmLexer(T, MAI) { std::string tripleString("arm-unknown-unknown"); std::string featureString; + std::string CPU; OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); + targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); InitRegisterMap(targetMachine->getRegisterInfo()); } }; @@ -99,8 +100,9 @@ public: : ARMBaseAsmLexer(T, MAI) { std::string tripleString("thumb-unknown-unknown"); std::string featureString; + std::string CPU; OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); + targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); InitRegisterMap(targetMachine->getRegisterInfo()); } }; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4bc12c9c2b49..a4741270c7a5 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -20,14 +20,17 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" + using namespace llvm; namespace { @@ -35,8 +38,8 @@ namespace { class ARMOperand; class ARMAsmParser : public TargetAsmParser { + MCSubtargetInfo &STI; MCAsmParser &Parser; - TargetMachine &TM; MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -47,7 +50,7 @@ class ARMAsmParser : public TargetAsmParser { int TryParseRegister(); virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); - bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); + int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &, ARMII::AddrMode AddrMode); @@ -79,6 +82,18 @@ class ARMAsmParser : public TargetAsmParser { void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); + bool isThumb() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & ARM::ModeThumb) != 0; + } + bool isThumbOne() const { + return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0; + } + void SwitchMode() { + unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); + setAvailableFeatures(FB); + } + /// @name Auto-generated Match Functions /// { @@ -113,13 +128,13 @@ class ARMAsmParser : public TargetAsmParser { const SmallVectorImpl<MCParsedAsmOperand*> &); public: - ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) - : TargetAsmParser(T), Parser(_Parser), TM(_TM) { - MCAsmParserExtension::Initialize(_Parser); - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<ARMSubtarget>())); - } + ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : TargetAsmParser(), STI(_STI), Parser(_Parser) { + MCAsmParserExtension::Initialize(_Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -146,6 +161,7 @@ class ARMOperand : public MCParsedAsmOperand { RegisterList, DPRRegisterList, SPRRegisterList, + ShiftedRegister, Shifter, Token } Kind; @@ -207,8 +223,14 @@ class ARMOperand : public MCParsedAsmOperand { struct { ARM_AM::ShiftOpc ShiftTy; - unsigned RegNum; + unsigned Imm; } Shift; + struct { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + } ShiftedReg; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -255,6 +277,9 @@ public: case Shifter: Shift = o.Shift; break; + case ShiftedRegister: + ShiftedReg = o.ShiftedReg; + break; } } @@ -350,6 +375,46 @@ public: bool isCondCode() const { return Kind == CondCode; } bool isCCOut() const { return Kind == CCOut; } bool isImm() const { return Kind == Immediate; } + bool isImm0_255() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 256; + } + bool isImm0_7() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 8; + } + bool isImm0_15() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 16; + } + bool isImm0_65535() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 65536; + } + bool isT2SOImm() const { + if (Kind != Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(Value) != -1; + } bool isReg() const { return Kind == Register; } bool isRegList() const { return Kind == RegisterList; } bool isDPRRegList() const { return Kind == DPRRegisterList; } @@ -358,6 +423,7 @@ public: bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } bool isMemory() const { return Kind == Memory; } bool isShifter() const { return Kind == Shifter; } + bool isShiftedReg() const { return Kind == ShiftedRegister; } bool isMemMode2() const { if (getMemAddrMode() != ARMII::AddrMode2) return false; @@ -488,6 +554,18 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addShiftedRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands!"); + assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!"); + assert((ShiftedReg.ShiftReg == 0 || + ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) && + "Invalid shifted register operand!"); + Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg)); + Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg)); + Inst.addOperand(MCOperand::CreateImm( + ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm))); + } + void addShifterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm( @@ -515,6 +593,31 @@ public: addExpr(Inst, getImm()); } + void addImm0_255Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm0_7Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm0_15Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addImm0_65535Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addT2SOImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -648,7 +751,7 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } - virtual void dump(raw_ostream &OS) const; + virtual void print(raw_ostream &OS) const; static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { ARMOperand *Op = new ARMOperand(CondCode); @@ -699,6 +802,21 @@ public: return Op; } + static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, + unsigned SrcReg, + unsigned ShiftReg, + unsigned ShiftImm, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(ShiftedRegister); + Op->ShiftedReg.ShiftTy = ShTy; + Op->ShiftedReg.SrcReg = SrcReg; + Op->ShiftedReg.ShiftReg = ShiftReg; + Op->ShiftedReg.ShiftImm = ShiftImm; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy, SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(Shifter); @@ -802,7 +920,7 @@ public: } // end anonymous namespace. -void ARMOperand::dump(raw_ostream &OS) const { +void ARMOperand::print(raw_ostream &OS) const { switch (Kind) { case CondCode: OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; @@ -863,7 +981,15 @@ void ARMOperand::dump(raw_ostream &OS) const { OS << "<register " << getReg() << ">"; break; case Shifter: - OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">"; + OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">"; + break; + case ShiftedRegister: + OS << "<so_reg" + << ShiftedReg.SrcReg + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm)) + << ", " << ShiftedReg.ShiftReg << ", " + << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) + << ">"; break; case RegisterList: case DPRRegisterList: @@ -927,11 +1053,12 @@ int ARMAsmParser::TryParseRegister() { return RegNum; } -/// Try to parse a register name. The token must be an Identifier when called, -/// and if it is a register name the token is eaten and the register number is -/// returned. Otherwise return -1. -/// -bool ARMAsmParser::TryParseShiftRegister( +// Try to parse a shifter (e.g., "lsl <amt>"). On success, return 0. +// If a recoverable error occurs, return 1. If an irrecoverable error +// occurs, return -1. An irrecoverable error is one where tokens have been +// consumed in the process of trying to parse the shifter (i.e., when it is +// indeed a shifter operand, but malformed). +int ARMAsmParser::TryParseShiftRegister( SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); @@ -948,18 +1075,69 @@ bool ARMAsmParser::TryParseShiftRegister( .Default(ARM_AM::no_shift); if (ShiftTy == ARM_AM::no_shift) - return true; - - Parser.Lex(); // Eat shift-type operand; - int RegNum = TryParseRegister(); - if (RegNum == -1) - return Error(Parser.getTok().getLoc(), "register expected"); + return 1; + + Parser.Lex(); // Eat the operator. + + // The source register for the shift has already been added to the + // operand list, so we need to pop it off and combine it into the shifted + // register operand instead. + OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val()); + if (!PrevOp->isReg()) + return Error(PrevOp->getStartLoc(), "shift must be of a register"); + int SrcReg = PrevOp->getReg(); + int64_t Imm = 0; + int ShiftReg = 0; + if (ShiftTy == ARM_AM::rrx) { + // RRX Doesn't have an explicit shift amount. The encoder expects + // the shift register to be the same as the source register. Seems odd, + // but OK. + ShiftReg = SrcReg; + } else { + // Figure out if this is shifted by a constant or a register (for non-RRX). + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat hash. + SMLoc ImmLoc = Parser.getTok().getLoc(); + const MCExpr *ShiftExpr = 0; + if (getParser().ParseExpression(ShiftExpr)) { + Error(ImmLoc, "invalid immediate shift value"); + return -1; + } + // The expression must be evaluatable as an immediate. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr); + if (!CE) { + Error(ImmLoc, "invalid immediate shift value"); + return -1; + } + // Range check the immediate. + // lsl, ror: 0 <= imm <= 31 + // lsr, asr: 0 <= imm <= 32 + Imm = CE->getValue(); + if (Imm < 0 || + ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) || + ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) { + Error(ImmLoc, "immediate shift value out of range"); + return -1; + } + } else if (Parser.getTok().is(AsmToken::Identifier)) { + ShiftReg = TryParseRegister(); + SMLoc L = Parser.getTok().getLoc(); + if (ShiftReg == -1) { + Error (L, "expected immediate or register in shift operand"); + return -1; + } + } else { + Error (Parser.getTok().getLoc(), + "expected immediate or register in shift operand"); + return -1; + } + } - Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc())); - Operands.push_back(ARMOperand::CreateShifter(ShiftTy, + Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, + ShiftReg, Imm, S, Parser.getTok().getLoc())); - return false; + return 0; } @@ -1162,10 +1340,14 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size())) .Case("sy", ARM_MB::SY) .Case("st", ARM_MB::ST) + .Case("sh", ARM_MB::ISH) .Case("ish", ARM_MB::ISH) + .Case("shst", ARM_MB::ISHST) .Case("ishst", ARM_MB::ISHST) .Case("nsh", ARM_MB::NSH) + .Case("un", ARM_MB::NSH) .Case("nshst", ARM_MB::NSHST) + .Case("unst", ARM_MB::NSHST) .Case("osh", ARM_MB::OSH) .Case("oshst", ARM_MB::OSHST) .Default(~0U); @@ -1604,15 +1786,18 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, default: Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; - case AsmToken::Identifier: + case AsmToken::Identifier: { if (!TryParseRegisterWithWriteBack(Operands)) return false; - if (!TryParseShiftRegister(Operands)) + int Res = TryParseShiftRegister(Operands); + if (Res == 0) // success return false; - + else if (Res == -1) // irrecoverable error + return true; // Fall though for the Identifier case that is not a register or a // special name. + } case AsmToken::Integer: // things like 1f and 2b as a branch targets case AsmToken::Dot: { // . as a branch target // This was not a register so parse other operands that start with an @@ -1761,30 +1946,35 @@ static StringRef SplitMnemonic(StringRef Mnemonic, Mnemonic == "vcle" || (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || - Mnemonic == "vqdmlal")) + Mnemonic == "vqdmlal" || Mnemonic == "bics")) return Mnemonic; - // First, split out any predication code. - unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) - .Case("eq", ARMCC::EQ) - .Case("ne", ARMCC::NE) - .Case("hs", ARMCC::HS) - .Case("lo", ARMCC::LO) - .Case("mi", ARMCC::MI) - .Case("pl", ARMCC::PL) - .Case("vs", ARMCC::VS) - .Case("vc", ARMCC::VC) - .Case("hi", ARMCC::HI) - .Case("ls", ARMCC::LS) - .Case("ge", ARMCC::GE) - .Case("lt", ARMCC::LT) - .Case("gt", ARMCC::GT) - .Case("le", ARMCC::LE) - .Case("al", ARMCC::AL) - .Default(~0U); - if (CC != ~0U) { - Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2); - PredicationCode = CC; + // First, split out any predication code. Ignore mnemonics we know aren't + // predicated but do have a carry-set and so weren't caught above. + if (Mnemonic != "adcs") { + unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("cs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("cc", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + if (CC != ~0U) { + Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2); + PredicationCode = CC; + } } // Next, determine if we have a carry setting bit. We explicitly ignore all @@ -1824,8 +2014,6 @@ static StringRef SplitMnemonic(StringRef Mnemonic, void ARMAsmParser:: GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { - bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb(); - if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" || @@ -1834,7 +2022,7 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" || Mnemonic == "eor" || Mnemonic == "smlal" || - (Mnemonic == "mov" && !isThumb)) { + (Mnemonic == "mov" && !isThumbOne())) { CanAcceptCarrySet = true; } else { CanAcceptCarrySet = false; @@ -1851,10 +2039,9 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, CanAcceptPredicationCode = true; } - if (isThumb) + if (isThumb()) if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || - Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp" || - Mnemonic == "mov") + Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") CanAcceptPredicationCode = false; } @@ -1884,20 +2071,22 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, bool CanAcceptCarrySet, CanAcceptPredicationCode; GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode); + // If we had a carry-set on an instruction that can't do that, issue an + // error. + if (!CanAcceptCarrySet && CarrySetting) { + Parser.EatToEndOfStatement(); + return Error(NameLoc, "instruction '" + Head + + "' can not set flags, but 's' suffix specified"); + } + // Add the carry setting operand, if necessary. // // FIXME: It would be awesome if we could somehow invent a location such that // match errors on this operand would print a nice diagnostic about how the // 's' character in the mnemonic resulted in a CCOut operand. - if (CanAcceptCarrySet) { + if (CanAcceptCarrySet) Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0, NameLoc)); - } else { - // This mnemonic can't ever accept a carry set, but the user wrote one (or - // misspelled another mnemonic). - - // FIXME: Issue a nice error. - } // Add the predication code operand, if necessary. if (CanAcceptPredicationCode) { @@ -1988,7 +2177,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, // that updates the condition codes if it ends in 's'. So see if the // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut // operand with a value of CPSR. - else if(MatchResult == Match_MnemonicFail) { + else if (MatchResult == Match_MnemonicFail) { // Get the instruction mnemonic, which is the first token. StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken(); if (Mnemonic.substr(Mnemonic.size()-1) == "s") { @@ -2174,20 +2363,15 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { return Error(Parser.getTok().getLoc(), "unexpected token in directive"); Parser.Lex(); - // FIXME: We need to be able switch subtargets at this point so that - // MatchInstructionImpl() will work when it gets the AvailableFeatures which - // includes Feature_IsThumb or not to match the right instructions. This is - // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine. - if (Val == 16){ - assert(TM.getSubtarget<ARMSubtarget>().isThumb() && - "switching between arm/thumb not yet suppported via .code 16)"); + if (Val == 16) { + if (!isThumb()) + SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); - } - else{ - assert(!TM.getSubtarget<ARMSubtarget>().isThumb() && - "switching between thumb/arm not yet suppported via .code 32)"); + } else { + if (isThumb()) + SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); - } + } return false; } diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index d3b8b54e76b8..21608d0b62fd 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -1,18 +1,16 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) -tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(ARMGenRegisterNames.inc -gen-register-enums) -tablegen(ARMGenRegisterInfo.inc -gen-register-desc) -tablegen(ARMGenInstrNames.inc -gen-instr-enums) -tablegen(ARMGenInstrInfo.inc -gen-instr-desc) +tablegen(ARMGenRegisterInfo.inc -gen-register-info) +tablegen(ARMGenInstrInfo.inc -gen-instr-info) tablegen(ARMGenCodeEmitter.inc -gen-emitter) tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) tablegen(ARMGenDAGISel.inc -gen-dag-isel) tablegen(ARMGenFastISel.inc -gen-fast-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) -tablegen(ARMGenSubtarget.inc -gen-subtarget) +tablegen(ARMGenSubtargetInfo.inc -gen-subtarget) tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(ARMGenDecoderTables.inc -gen-arm-decoder) @@ -34,10 +32,10 @@ add_llvm_target(ARMCodeGen ARMISelLowering.cpp ARMInstrInfo.cpp ARMJITInfo.cpp + ARMMachObjectWriter.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp ARMLoadStoreOptimizer.cpp - ARMMCAsmInfo.cpp ARMMCInstLower.cpp ARMRegisterInfo.cpp ARMSelectionDAGInfo.cpp @@ -67,3 +65,4 @@ add_subdirectory(TargetInfo) add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index 271ca8c72f08..d89c80a9d457 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -24,8 +24,8 @@ //#define DEBUG(X) do { X; } while (0) /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const -/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s -/// describing the operand info for each ARMInsts[i]. +/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the +/// operand info for each ARMInsts[i]. /// /// Together with an instruction's encoding format, we can take advantage of the /// NumOperands and the OpInfo fields of the target instruction description in @@ -46,10 +46,10 @@ /// dag DefaultOps = (ops (i32 14), (i32 zero_reg)); /// } /// -/// which is manifested by the TargetOperandInfo[] of: +/// which is manifested by the MCOperandInfo[] of: /// -/// { 0, 0|(1<<TOI::Predicate), 0 }, -/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 } +/// { 0, 0|(1<<MCOI::Predicate), 0 }, +/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 } /// /// So the first predicate MCOperand corresponds to the immediate part of the /// ARM condition field (Inst{31-28}), and the second predicate MCOperand @@ -66,12 +66,14 @@ /// dag DefaultOps = (ops (i32 zero_reg)); /// } /// -/// which is manifested by the one TargetOperandInfo of: +/// which is manifested by the one MCOperandInfo of: /// -/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 } +/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 } /// -/// And this maps to one MCOperand with the regsiter kind of ARM::CPSR. -#include "ARMGenInstrInfo.inc" + +namespace llvm { +extern MCInstrDesc ARMInsts[]; +} using namespace llvm; @@ -588,9 +590,9 @@ static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + unsigned short NumDefs = MCID.getNumDefs(); + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -739,9 +741,9 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, if (PW) { MI.addOperand(MCOperand::CreateReg(0)); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - const TargetInstrDesc &TID = ARMInsts[Opcode]; + const MCInstrDesc &MCID = ARMInsts[Opcode]; unsigned IndexMode = - (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, ARM_AM::no_shift, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); @@ -802,7 +804,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (CoprocessorOpcode(Opcode)) return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; // MRS and MRSsys take one GPR reg Rd. @@ -901,7 +903,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; @@ -976,10 +978,10 @@ static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); - bool isUnary = isUnaryDP(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + unsigned short NumDefs = MCID.getNumDefs(); + bool isUnary = isUnaryDP(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1041,7 +1043,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // If this is a two-address operand, skip it, e.g., MOVCCr operand 1. - if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) { + if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; } @@ -1089,10 +1091,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); - bool isUnary = isUnaryDP(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + unsigned short NumDefs = MCID.getNumDefs(); + bool isUnary = isUnaryDP(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1118,7 +1120,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // If this is a two-address operand, skip it, e.g., MOVCCs operand 1. - if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) { + if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) { MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; } @@ -1244,17 +1246,17 @@ static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBac static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + bool isPrePost = isPrePostLdSt(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && TID.getNumDefs() > 0) || - (isStore && (TID.getNumDefs() == 0 || isPrePost))) + assert(((!isStore && MCID.getNumDefs() > 0) || + (isStore && (MCID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. @@ -1291,7 +1293,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) + assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -1308,7 +1310,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; unsigned IndexMode = - (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getIBit(insn) == 0) { // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already @@ -1379,17 +1381,17 @@ static bool HasDualReg(unsigned Opcode) { static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - bool isPrePost = isPrePostLdSt(TID.TSFlags); - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + bool isPrePost = isPrePostLdSt(MCID.TSFlags); + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && TID.getNumDefs() > 0) || - (isStore && (TID.getNumDefs() == 0 || isPrePost))) + assert(((!isStore && MCID.getNumDefs() > 0) || + (isStore && (MCID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. @@ -1433,7 +1435,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) + assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) && "Offset mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -1451,7 +1453,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; unsigned IndexMode = - (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; + (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getAM3IBit(insn) == 1) { MI.addOperand(MCOperand::CreateReg(0)); @@ -1539,7 +1541,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; @@ -1591,7 +1593,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1653,8 +1655,8 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (decodeRd(insn) == 15 || decodeRm(insn) == 15) return false; - const TargetInstrDesc &TID = ARMInsts[Opcode]; - NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + const MCInstrDesc &MCID = ARMInsts[Opcode]; + NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands // Disassemble register def. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -1696,7 +1698,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (decodeRd(insn) == 15 || decodeRm(insn) == 15) return false; - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1802,7 +1804,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1842,8 +1844,8 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1858,7 +1860,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Skip tied_to operand constraint. - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { assert(NumOps >= 4 && "Expect >=4 operands"); MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -1886,8 +1888,8 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 @@ -1903,7 +1905,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, getRegisterEnum(B, RegClassID, decodeVFPRd(insn, SP)))); - assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && + assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MI.getOperand(0)); @@ -1961,7 +1963,7 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2011,7 +2013,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2136,7 +2138,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2402,8 +2404,8 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, unsigned alignment, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; // At least one DPR register plus addressing mode #6. assert(NumOps >= 3 && "Expect >= 3 operands"); @@ -2507,7 +2509,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { - assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 && + assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -2757,8 +2759,8 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -2848,8 +2850,8 @@ enum N2VFlag { static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opc]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opc]; + const MCOperandInfo *OpInfo = MCID.OpInfo; assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -2878,7 +2880,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, ++OpIdx; // VPADAL... - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -2892,7 +2894,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, // VZIP and others have two TIED_TO reg operands. int Idx; while (OpIdx < NumOps && - (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // Add TIED_TO operand. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -2945,8 +2947,8 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; assert(NumOps >= 3 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -2964,7 +2966,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, decodeNEONRd(insn)))); ++OpIdx; - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -3044,8 +3046,8 @@ enum N3VFlag { static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs. assert(NumOps >= 3 && @@ -3076,7 +3078,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // VABA, VABAL, VBSLd, VBSLq, ... - if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) { + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) { // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); ++OpIdx; @@ -3091,11 +3093,6 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, : decodeNEONRm(insn)))); ++OpIdx; - // Special case handling for VMOVDneon and VMOVQ because they are marked as - // N3RegFrm. - if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ) - return true; - // Dm = Inst{5:3-0} => NEON Rm // or // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise @@ -3163,8 +3160,8 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; assert(NumOps >= 3 && @@ -3192,7 +3189,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Process tied_to operand constraint. int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { MI.addOperand(MI.getOperand(Idx)); ++OpIdx; } @@ -3221,11 +3218,11 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; - assert(TID.getNumDefs() == 1 && NumOps >= 3 && + assert(MCID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && OpInfo[2].RegClass < 0 && @@ -3255,14 +3252,14 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; if (!OpInfo) return false; - assert(TID.getNumDefs() == 1 && NumOps >= 3 && + assert(MCID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && - TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && + MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 && OpInfo[2].RegClass == ARM::GPRRegClassID && OpInfo[3].RegClass < 0 && "Expect >= 3 operands with one dst operand"); @@ -3294,7 +3291,7 @@ static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || @@ -3379,7 +3376,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (Opcode == ARM::DMB || Opcode == ARM::DSB) { + if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) { // Inst{3-0} encodes the memory barrier option for the variants. unsigned opt = slice(insn, 3, 0); switch (opt) { @@ -3604,11 +3601,11 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, assert(NumOpsRemaining > 0 && "Invalid argument"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned Idx = MI.getNumOperands(); // First, we check whether this instr specifies the PredicateOperand through - // a pair of TargetOperandInfos with isPredicate() property. + // a pair of MCOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && OpInfo[Idx].RegClass < 0 && @@ -3636,13 +3633,13 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, assert(NumOpsRemaining > 0 && "Invalid argument"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; const std::string &Name = ARMInsts[Opcode].Name; unsigned Idx = MI.getNumOperands(); uint64_t TSFlags = ARMInsts[Opcode].TSFlags; // First, we check whether this instr specifies the PredicateOperand through - // a pair of TargetOperandInfos with isPredicate() property. + // a pair of MCOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && OpInfo[Idx].RegClass < 0 && diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 9639c8a4b1bd..834c6f65295d 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -350,7 +350,7 @@ static inline unsigned decodeRotate(uint32_t insn) { static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -425,8 +425,8 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -454,7 +454,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID && "Thumb reg operand expected"); int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // The reg operand is tied to the first reg operand. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -511,8 +511,8 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -530,7 +530,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && "More operands expected"); int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // The reg operand is tied to the first reg operand. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -554,7 +554,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -602,7 +602,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 2 && @@ -630,8 +630,8 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; assert(NumOps >= 2 @@ -680,7 +680,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) && "Unexpected opcode"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 3 && @@ -708,7 +708,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -733,7 +733,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 3 && @@ -810,7 +810,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; // Predicate operands are handled elsewhere. if (NumOps == 2 && @@ -958,7 +958,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::tTRAP) return true; - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps == 3 && OpInfo[0].RegClass < 0 && @@ -989,7 +989,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); @@ -1226,7 +1226,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; unsigned &OpIdx = NumOpsAdded; @@ -1316,7 +1316,7 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; assert(NumOps >= 4 @@ -1423,8 +1423,8 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) { static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; // Special case handling. @@ -1467,7 +1467,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (ThreeReg) { int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); ++OpIdx; @@ -1521,8 +1521,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1550,7 +1550,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, return false; } int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // The reg operand is tied to the first reg operand. MI.addOperand(MI.getOperand(Idx)); } else { @@ -1590,8 +1590,8 @@ static inline bool Thumb2SaturateOpcode(unsigned Opcode) { /// o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + const MCInstrDesc &MCID = ARMInsts[Opcode]; + NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands // Disassemble the register def. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, @@ -1635,8 +1635,8 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1659,7 +1659,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (TwoReg) { assert(NumOps >= 3 && "Expect >= 3 operands"); int Idx; - if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); } else { @@ -1907,8 +1907,8 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, // t2PLDs: Rn Rm imm2=Inst{5-4} // Same pattern applies for t2PLDW* and t2PLI*. - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2073,8 +2073,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, // See, for example, A6.3.7 Load word: Table A6-18 Load word. if (Load && Rn == 15) return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2085,7 +2085,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, "Expect >= 3 operands and first two as reg operands"); bool ThreeReg = (OpInfo[2].RegClass > 0); - bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1; + bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1; bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td // Build the register operands, followed by the immediate. @@ -2160,8 +2160,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetInstrDesc &TID = ARMInsts[Opcode]; - const TargetOperandInfo *OpInfo = TID.OpInfo; + const MCInstrDesc &MCID = ARMInsts[Opcode]; + const MCOperandInfo *OpInfo = MCID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -2214,7 +2214,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::rGPRRegClassID && @@ -2259,7 +2259,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::rGPRRegClassID && diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 8ae87f81cc1e..78d3e477975c 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -126,38 +126,6 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream, - const MCAsmInfo *MAI) { - // Break it up into two parts that make up a shifter immediate. - V = ARM_AM::getSOImmVal(V); - assert(V != -1 && "Not a valid so_imm value!"); - - unsigned Imm = ARM_AM::getSOImmValImm(V); - unsigned Rot = ARM_AM::getSOImmValRot(V); - - // Print low-level immediate formation info, per - // A5.2.3: Data-processing (immediate), and - // A5.2.4: Modified immediate constants in ARM instructions - if (Rot) { - O << "#" << Imm << ", #" << Rot; - // Pretty printed version. - if (CommentStream) - *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n"; - } else { - O << "#" << Imm; - } -} - - -/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit -/// immediate in bits 0-7. -void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - assert(MO.isImm() && "Not a valid so_imm value!"); - printSOImm(O, MO.getImm(), CommentStream, &MAI); -} - // so_reg is a 4-operand unit corresponding to register forms of the A5.1 // "Addressing Mode 1 - Data-processing operands" forms. This includes: // REG 0 0 - e.g. R5 @@ -174,6 +142,8 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, // Print the shift opc. ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc == ARM_AM::rrx) + return; if (MO2.getReg()) { O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index bde0eb9199a9..d5f238bb8a61 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -19,11 +19,10 @@ namespace llvm { class MCOperand; -class TargetMachine; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); @@ -39,8 +38,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 53b4c95d3801..53b4c95d3801 100644 --- a/lib/Target/ARM/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp diff --git a/lib/Target/ARM/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 90f7822ea580..90f7822ea580 100644 --- a/lib/Target/ARM/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp new file mode 100644 index 000000000000..f8fcf2b8aff1 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -0,0 +1,144 @@ +//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides ARM specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCTargetDesc.h" +#include "ARMMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_REGINFO_MC_DESC +#include "ARMGenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "ARMGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "ARMGenSubtargetInfo.inc" + +using namespace llvm; + +std::string ARM_MC::ParseARMTriple(StringRef TT) { + // Set the boolean corresponding to the current target triple, or the default + // if one cannot be determined, to true. + unsigned Len = TT.size(); + unsigned Idx = 0; + + // FIXME: Enahnce Triple helper class to extract ARM version. + bool isThumb = false; + if (Len >= 5 && TT.substr(0, 4) == "armv") + Idx = 4; + else if (Len >= 6 && TT.substr(0, 5) == "thumb") { + isThumb = true; + if (Len >= 7 && TT[5] == 'v') + Idx = 6; + } + + std::string ARMArchFeature; + if (Idx) { + unsigned SubVer = TT[Idx]; + if (SubVer >= '7' && SubVer <= '9') { + if (Len >= Idx+2 && TT[Idx+1] == 'm') { + // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv + ARMArchFeature = "+v7,+noarm,+db,+hwdiv"; + } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') { + // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, + // FeatureT2XtPk + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk"; + } else + // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2 + ARMArchFeature = "+v7,+neon,+db,+t2dsp"; + } else if (SubVer == '6') { + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') + ARMArchFeature = "+v6t2"; + else + ARMArchFeature = "+v6"; + } else if (SubVer == '5') { + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') + ARMArchFeature = "+v5te"; + else + ARMArchFeature = "+v5t"; + } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't') + ARMArchFeature = "+v4t"; + } + + if (isThumb) { + if (ARMArchFeature.empty()) + ARMArchFeature = "+thumb-mode"; + else + ARMArchFeature += ",+thumb-mode"; + } + + return ARMArchFeature; +} + +MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + std::string ArchFS = ARM_MC::ParseARMTriple(TT); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } + + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitARMMCSubtargetInfo(X, TT, CPU, ArchFS); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeARMMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget, + ARM_MC::createARMMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget, + ARM_MC::createARMMCSubtargetInfo); +} + +static MCInstrInfo *createARMMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitARMMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeARMMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo); +} + +static MCRegisterInfo *createARMMCRegisterInfo() { + MCRegisterInfo *X = new MCRegisterInfo(); + InitARMMCRegisterInfo(X); + return X; +} + +extern "C" void LLVMInitializeARMMCRegInfo() { + TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo); +} + +static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) + return new ARMMCAsmInfoDarwin(); + + return new ARMELFMCAsmInfo(); +} + +extern "C" void LLVMInitializeARMMCAsmInfo() { + // Register the target asm info. + RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo); + RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h new file mode 100644 index 000000000000..74701e3516dc --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -0,0 +1,52 @@ +//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides ARM specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMMCTARGETDESC_H +#define ARMMCTARGETDESC_H + +#include <string> + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheARMTarget, TheThumbTarget; + +namespace ARM_MC { + std::string ParseARMTriple(StringRef TT); + + /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance. + /// This is exposed so Asm parser, etc. do not need to go through + /// TargetRegistry. + MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +} // End llvm namespace + +// Defines symbolic names for ARM registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "ARMGenRegisterInfo.inc" + +// Defines symbolic names for the ARM instructions. +// +#define GET_INSTRINFO_ENUM +#include "ARMGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "ARMGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..68daf42c9191 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMARMDesc + ARMMCTargetDesc.cpp + ARMMCAsmInfo.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/ARM/MCTargetDesc/Makefile b/lib/Target/ARM/MCTargetDesc/Makefile new file mode 100644 index 000000000000..448ed9df2bff --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f6d024232eae..2df00538b39f 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -137,11 +137,11 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. - const TargetInstrDesc &TID = MI->getDesc(); - unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (TID.mayStore()) + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Domain = MCID.TSFlags & ARMII::DomainMask; + if (MCID.mayStore()) return false; - unsigned Opcode = TID.getOpcode(); + unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) return false; if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) @@ -218,18 +218,18 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); unsigned PredReg = MI->getOperand(++NextOp).getReg(); - const TargetInstrDesc &TID1 = TII->get(MulOpc); - const TargetInstrDesc &TID2 = TII->get(AddSubOpc); - unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI)); + const MCInstrDesc &MCID1 = TII->get(MulOpc); + const MCInstrDesc &MCID2 = TII->get(AddSubOpc); + unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg) + MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) .addReg(Src2Reg, getKillRegState(Src2Kill)); if (HasLane) MIB.addImm(LaneImm); MIB.addImm(Pred).addReg(PredReg); - MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2) + MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); if (NegAcc) { @@ -273,15 +273,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { continue; } - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.isBarrier()) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.isBarrier()) { clearStack(); Skip = 0; ++MII; continue; } - unsigned Domain = TID.TSFlags & ARMII::DomainMask; + unsigned Domain = MCID.TSFlags & ARMII::DomainMask; if (Domain == ARMII::DomainGeneral) { if (++Skip == 2) // Assume dual issues of non-VFP / NEON instructions. @@ -291,7 +291,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { unsigned MulOpc, AddSubOpc; bool NegAcc, HasLane; - if (!TII->isFpMLxInstruction(TID.getOpcode(), + if (!TII->isFpMLxInstruction(MCID.getOpcode(), MulOpc, AddSubOpc, NegAcc, HasLane) || !FindMLxHazard(MI)) pushStack(MI); diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 65a6494986fe..eb8c60354476 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -12,14 +12,14 @@ LIBRARYNAME = LLVMARMCodeGen TARGET = ARM # Make sure that tblgen is run, first thing. -BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ - ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ - ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ - ARMGenDAGISel.inc ARMGenSubtarget.inc \ +BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ + ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ + ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ ARMGenDecoderTables.inc ARMGenEDInfo.inc \ - ARMGenFastISel.inc ARMGenMCCodeEmitter.inc + ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ + ARMGenMCPseudoLowering.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 965665c2821a..c85d1e99705a 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -77,7 +77,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { } if (inNEONDomain(Domain, isA8)) { - // Convert VMOVD to VMOVDneon + // Convert VMOVD to VORRd unsigned DestReg = MI->getOperand(0).getReg(); DEBUG({errs() << "vmov convert: "; MI->dump();}); @@ -88,7 +88,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { // - The imp-defs / imp-uses are superregs only, we don't care about // them. AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg)); + TII->get(ARM::VORRd), DestReg) + .addReg(SrcReg).addReg(SrcReg)); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 8ba9a27e95c8..2f6842e8cb60 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -681,3 +681,21 @@ is compiled and optimized to: str r1, [r0] //===---------------------------------------------------------------------===// + +Improve codegen for select's: +if (x != 0) x = 1 +if (x == 1) x = 1 + +ARM codegen used to look like this: + mov r1, r0 + cmp r1, #1 + mov r0, #0 + moveq r0, #1 + +The naive lowering select between two different values. It should recognize the +test is equality test so it's more a conditional move rather than a select: + cmp r0, #1 + movne r0, #0 + +Currently this is a ARM specific dag combine. We probably should make it into a +target-neutral one. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index dee3d278203f..c258870e48a5 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -136,8 +136,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup); - if (NumBytes > 7) - // If offset is > 7 then sp cannot be adjusted in a single instruction, + if (NumBytes > 508) + // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } @@ -160,7 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) + .addReg(ARM::SP)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already @@ -177,7 +178,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - if (MI->getOpcode() == ARM::tRestore && + if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) return true; @@ -239,11 +240,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(ARM::R4); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(ARM::R4)); } else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(FramePtr); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), + ARM::SP) + .addReg(FramePtr)); } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && @@ -270,8 +273,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill)); // erase the old tBX_RET instruction MBB.erase(MBBI); } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 3fbb43340c3f..218311d78d30 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -13,7 +13,6 @@ #include "Thumb1InstrInfo.h" #include "ARM.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -37,18 +36,8 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - bool tDest = ARM::tGPRRegClass.contains(DestReg); - bool tSrc = ARM::tGPRRegClass.contains(SrcReg); - unsigned Opc = ARM::tMOVgpr2gpr; - if (tDest && tSrc) - Opc = ARM::tMOVr; - else if (tSrc) - Opc = ARM::tMOVtgpr2gpr; - else if (tDest) - Opc = ARM::tMOVgpr2tgpr; - - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); } @@ -76,7 +65,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } @@ -105,7 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 6bf565068e4a..4eb0b6c93e1d 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -239,13 +239,13 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, unsigned Chunk = (1 << 3) - 1; unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; - const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); + const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill)) .setMIFlags(MIFlags); } BaseReg = DestReg; @@ -291,8 +291,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } if (ExtraOpc) { - const TargetInstrDesc &TID = TII.get(ExtraOpc); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) + const MCInstrDesc &MCID = TII.get(ExtraOpc); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) .addReg(DestReg, RegState::Kill) .addImm(((unsigned)NumBytes) & 3) .setMIFlags(MIFlags)); @@ -360,8 +360,8 @@ static void emitThumbConstant(MachineBasicBlock &MBB, if (Imm > 0) emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); if (isSub) { - const TargetInstrDesc &TID = TII.get(ARM::tRSB); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) + const MCInstrDesc &MCID = TII.get(ARM::tRSB); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) .addReg(DestReg, RegState::Kill)); } } @@ -377,11 +377,9 @@ static void removeOperands(MachineInstr &MI, unsigned i) { static unsigned convertToNonSPOpcode(unsigned Opcode) { switch (Opcode) { case ARM::tLDRspi: - case ARM::tRestore: // FIXME: Should this opcode be here? return ARM::tLDRi; case ARM::tSTRspi: - case ARM::tSpill: // FIXME: Should this opcode be here? return ARM::tSTRi; } @@ -396,7 +394,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); if (Opcode == ARM::tADDrSPi) { @@ -419,13 +417,12 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, unsigned PredReg; if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); + MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(FrameRegIdx+1); - while (MI.getNumOperands() > FrameRegIdx+1 && - (!MI.getOperand(FrameRegIdx+1).isReg() || - !MI.getOperand(FrameRegIdx+1).isImm())); + // Remove offset and add predicate operands. + MI.RemoveOperand(FrameRegIdx+1); + MachineInstrBuilder MIB(&MI); + AddDefaultPred(MIB); return true; } @@ -524,7 +521,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // If this is a thumb spill / restore, we will be using a constpool load to // materialize the offset. - if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) { + if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { ImmOp.ChangeToImmediate(0); } else { // Otherwise, it didn't fit. Pull in what we can to simplify the immed. @@ -567,8 +564,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. DebugLoc DL; - BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). - addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(Reg, RegState::Kill)); // The UseMI is where we would like to restore the register. If there's // interference with R12 before then, however, we'll need to restore it @@ -591,8 +589,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, } } // Restore the register from R12 - BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); + AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill)); return true; } @@ -653,7 +651,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset && "This code isn't needed if offset already handled!"); unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); @@ -664,7 +662,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; - if (Opcode == ARM::tRestore) { + if (Opcode == ARM::tLDRspi) { if (FrameReg == ARM::SP) emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg, Offset, false, TII, *this); @@ -687,7 +685,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; - if (Opcode == ARM::tSpill) { + if (Opcode == ARM::tSTRspi) { if (FrameReg == ARM::SP) emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg, Offset, false, TII, *this); diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 45e693744b80..360ec009e201 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -98,9 +98,6 @@ static bool isCopy(MachineInstr *MI) { case ARM::MOVr: case ARM::MOVr_TC: case ARM::tMOVr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2gpr: case ARM::t2MOVr: return true; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index d169dbb7f197..51b56aaeb008 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMAddressingModes.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -113,18 +112,8 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); - bool tDest = ARM::tGPRRegClass.contains(DestReg); - bool tSrc = ARM::tGPRRegClass.contains(SrcReg); - unsigned Opc = ARM::tMOVgpr2gpr; - if (tDest && tSrc) - Opc = ARM::tMOVr; - else if (tSrc) - Opc = ARM::tMOVtgpr2gpr; - else if (tDest) - Opc = ARM::tMOVgpr2tgpr; - - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); } void Thumb2InstrInfo:: @@ -232,8 +221,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg) - .addReg(BaseReg).setMIFlags(MIFlags); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg) + .addReg(BaseReg).setMIFlags(MIFlags)); BaseReg = ARM::SP; continue; } @@ -252,7 +241,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } // sub rd, sp, so_imm - Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi; + Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { NumBytes = 0; } else { @@ -396,7 +385,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; @@ -410,25 +399,24 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned PredReg; if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVgpr2gpr)); + MI.setDesc(TII.get(ARM::tMOVr)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. do MI.RemoveOperand(FrameRegIdx+1); - while (MI.getNumOperands() > FrameRegIdx+1 && - (!MI.getOperand(FrameRegIdx+1).isReg() || - !MI.getOperand(FrameRegIdx+1).isImm())); + while (MI.getNumOperands() > FrameRegIdx+1); + MachineInstrBuilder MIB(&MI); + AddDefaultPred(MIB); return true; } - bool isSP = FrameReg == ARM::SP; bool HasCCOut = Opcode != ARM::t2ADDri12; if (Offset < 0) { Offset = -Offset; isSub = true; - MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri)); + MI.setDesc(TII.get(ARM::t2SUBri)); } else { - MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri)); + MI.setDesc(TII.get(ARM::t2ADDri)); } // Common case: small offset, fits into instruction. @@ -444,9 +432,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Another common case: imm12. if (Offset < 4096 && (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) { - unsigned NewOpc = isSP - ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12) - : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12); + unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); @@ -579,8 +565,7 @@ void Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, const TargetRegisterInfo &TRI) const { - if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr || - SrcMI->getOperand(1).isKill()) + if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill()) return; unsigned PredReg = 0; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ce2e9663fb74..c741a6e8a5b7 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -57,10 +57,8 @@ namespace { static const ReduceEntry ReduceTable[] = { // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 }, { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, - // Note: immediate scale is 4. - { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0,1 }, { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, @@ -84,9 +82,7 @@ namespace { { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0,0 }, - { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 }, - { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 }, + { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 }, { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, @@ -189,8 +185,8 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { } } -static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { - for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs) +static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { + for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; @@ -291,7 +287,7 @@ static bool VerifyLowRegs(MachineInstr *MI) { Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || Opc == ARM::t2LDMDB_UPD); bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); - bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); + bool isSPOk = isPCOk || isLROk; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isImplicit()) @@ -481,14 +477,54 @@ bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, MachineInstr *CPSRDef) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::t2ADDri) { + // If the source register is SP, try to reduce to tADDrSPi, otherwise + // it's a normal reduce. + if (MI->getOperand(1).getReg() != ARM::SP) { + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + return true; + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + } + // Try to reduce to tADDrSPi. + unsigned Imm = MI->getOperand(2).getImm(); + // The immediate must be in range, the destination register must be a low + // reg, the predicate must be "always" and the condition flags must not + // be being set. + if (Imm & 3 || Imm > 1020) + return false; + if (!isARMLowRegister(MI->getOperand(0).getReg())) + return false; + if (MI->getOperand(3).getImm() != ARMCC::AL) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef() && + MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) + return false; + + MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), + TII->get(ARM::tADDrSPi)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); + + MBB.erase(MI); + ++NumNarrows; + return true; + } + if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad() || TID.mayStore()) + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.mayLoad() || MCID.mayStore()) return ReduceLoadStore(MBB, MI, Entry); - unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; case ARM::t2ADDSri: @@ -531,13 +567,6 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } - case ARM::t2ADDrSPi: { - static const ReduceEntry NarrowEntry = - { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 }; - if (MI->getOperand(0).getReg() == ARM::SP) - return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef); - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); - } } return false; } @@ -576,23 +605,23 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, } // Check if it's possible / necessary to transfer the predicate. - const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); + const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { - if (!NewTID.isPredicable()) + if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { - SkipPred = !NewTID.isPredicable(); + SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.hasOptionalDef()) { - unsigned NumOps = TID.getNumOperands(); + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.hasOptionalDef()) { + unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; @@ -602,15 +631,15 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. - if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(CPSRDef, MI)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); - if (NewTID.hasOptionalDef()) { + if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else @@ -618,11 +647,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, } // Transfer the rest of operands. - unsigned NumOps = TID.getNumOperands(); + unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { - if (i < NumOps && TID.OpInfo[i].isOptionalDef()) + if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; - if (SkipPred && TID.OpInfo[i].isPredicate()) + if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; MIB.addOperand(MI->getOperand(i)); } @@ -645,47 +674,44 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, return false; unsigned Limit = ~0U; - unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; if (Entry.Imm1Limit) - Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; + Limit = (1 << Entry.Imm1Limit) - 1; - const TargetInstrDesc &TID = MI->getDesc(); - for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { - if (TID.OpInfo[i].isPredicate()) + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { + if (MCID.OpInfo[i].isPredicate()) continue; const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) { unsigned Reg = MO.getReg(); if (!Reg || Reg == ARM::CPSR) continue; - if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) - continue; if (Entry.LowRegs1 && !isARMLowRegister(Reg)) return false; } else if (MO.isImm() && - !TID.OpInfo[i].isPredicate()) { - if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0) + !MCID.OpInfo[i].isPredicate()) { + if (((unsigned)MO.getImm()) > Limit) return false; } } // Check if it's possible / necessary to transfer the predicate. - const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); + const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool SkipPred = false; if (Pred != ARMCC::AL) { - if (!NewTID.isPredicable()) + if (!NewMCID.isPredicable()) // Can't transfer predicate, fail. return false; } else { - SkipPred = !NewTID.isPredicable(); + SkipPred = !NewMCID.isPredicable(); } bool HasCC = false; bool CCDead = false; - if (TID.hasOptionalDef()) { - unsigned NumOps = TID.getNumOperands(); + if (MCID.hasOptionalDef()) { + unsigned NumOps = MCID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); if (HasCC && MI->getOperand(NumOps-1).isDead()) CCDead = true; @@ -695,15 +721,15 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. - if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && canAddPseudoFlagDep(CPSRDef, MI)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); - if (NewTID.hasOptionalDef()) { + if (NewMCID.hasOptionalDef()) { if (HasCC) AddDefaultT1CC(MIB, CCDead); else @@ -711,29 +737,25 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, } // Transfer the rest of operands. - unsigned NumOps = TID.getNumOperands(); + unsigned NumOps = MCID.getNumOperands(); for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { - if (i < NumOps && TID.OpInfo[i].isOptionalDef()) + if (i < NumOps && MCID.OpInfo[i].isOptionalDef()) continue; - if ((TID.getOpcode() == ARM::t2RSBSri || - TID.getOpcode() == ARM::t2RSBri) && i == 2) + if ((MCID.getOpcode() == ARM::t2RSBSri || + MCID.getOpcode() == ARM::t2RSBri) && i == 2) // Skip the zero immediate operand, it's now implicit. continue; - bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); + bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate()); if (SkipPred && isPred) continue; const MachineOperand &MO = MI->getOperand(i); - if (Scale > 1 && !isPred && MO.isImm()) - MIB.addImm(MO.getImm() / Scale); - else { - if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) - // Skip implicit def of CPSR. Either it's modeled as an optional - // def now or it's already an implicit def on the new instruction. - continue; - MIB.addOperand(MO); - } + if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) + // Skip implicit def of CPSR. Either it's modeled as an optional + // def now or it's already an implicit def on the new instruction. + continue; + MIB.addOperand(MO); } - if (!TID.isPredicable() && NewTID.isPredicable()) + if (!MCID.isPredicable() && NewMCID.isPredicable()) AddDefaultPred(MIB); // Transfer MI flags. diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h index 2c359dade29b..6ffaf45f4ed1 100644 --- a/lib/Target/Alpha/Alpha.h +++ b/lib/Target/Alpha/Alpha.h @@ -15,6 +15,7 @@ #ifndef TARGET_ALPHA_H #define TARGET_ALPHA_H +#include "MCTargetDesc/AlphaMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -37,17 +38,6 @@ namespace llvm { FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm); FunctionPass *createAlphaBranchSelectionPass(); - extern Target TheAlphaTarget; - } // end namespace llvm; -// Defines symbolic names for Alpha registers. This defines a mapping from -// register name to register number. -// -#include "AlphaGenRegisterNames.inc" - -// Defines symbolic names for the Alpha instructions. -// -#include "AlphaGenInstrNames.inc" - #endif diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 0875cfd1c3c5..de003fb4c65e 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -122,6 +122,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Promote); setOperationAction(ISD::BITCAST, MVT::f32, Promote); @@ -824,41 +827,24 @@ AlphaTargetLowering::getSingleConstraintMatchWeight( return weight; } -std::vector<unsigned> AlphaTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const +{ if (Constraint.size() == 1) { switch (Constraint[0]) { - default: break; // Unknown constriant letter - case 'f': - return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 , - Alpha::F3 , Alpha::F4 , Alpha::F5 , - Alpha::F6 , Alpha::F7 , Alpha::F8 , - Alpha::F9 , Alpha::F10, Alpha::F11, - Alpha::F12, Alpha::F13, Alpha::F14, - Alpha::F15, Alpha::F16, Alpha::F17, - Alpha::F18, Alpha::F19, Alpha::F20, - Alpha::F21, Alpha::F22, Alpha::F23, - Alpha::F24, Alpha::F25, Alpha::F26, - Alpha::F27, Alpha::F28, Alpha::F29, - Alpha::F30, Alpha::F31, 0); case 'r': - return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 , - Alpha::R3 , Alpha::R4 , Alpha::R5 , - Alpha::R6 , Alpha::R7 , Alpha::R8 , - Alpha::R9 , Alpha::R10, Alpha::R11, - Alpha::R12, Alpha::R13, Alpha::R14, - Alpha::R15, Alpha::R16, Alpha::R17, - Alpha::R18, Alpha::R19, Alpha::R20, - Alpha::R21, Alpha::R22, Alpha::R23, - Alpha::R24, Alpha::R25, Alpha::R26, - Alpha::R27, Alpha::R28, Alpha::R29, - Alpha::R30, Alpha::R31, 0); + return std::make_pair(0U, Alpha::GPRCRegisterClass); + case 'f': + return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) : + std::make_pair(0U, Alpha::F4RCRegisterClass); } } - - return std::vector<unsigned>(); + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } + //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index d38c3145b19f..13383f4430f9 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -94,9 +94,9 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 5a2f5610fdb4..4dcec8f31750 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -14,17 +14,21 @@ #include "Alpha.h" #include "AlphaInstrInfo.h" #include "AlphaMachineFunctionInfo.h" -#include "AlphaGenInstrInfo.inc" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_CTOR +#include "AlphaGenInstrInfo.inc" using namespace llvm; AlphaInstrInfo::AlphaInstrInfo() - : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)), - RI(*this) { } + : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), + RI(*this) { +} unsigned diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index ee6077a4a01a..337a85cdf22d 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -17,9 +17,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "AlphaRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "AlphaGenInstrInfo.inc" + namespace llvm { -class AlphaInstrInfo : public TargetInstrInfoImpl { +class AlphaInstrInfo : public AlphaGenInstrInfo { const AlphaRegisterInfo RI; public: AlphaInstrInfo(); diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index d6c3809960aa..df8f157266e1 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -33,10 +33,14 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include <cstdlib> + +#define GET_REGINFO_TARGET_DESC +#include "AlphaGenRegisterInfo.inc" + using namespace llvm; AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) - : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), + : AlphaGenRegisterInfo(), TII(tii) { } @@ -204,10 +208,8 @@ int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const { return -1; } -#include "AlphaGenRegisterInfo.inc" - std::string AlphaRegisterInfo::getPrettyName(unsigned reg) { - std::string s(RegisterDescriptors[reg].Name); + std::string s(AlphaRegDesc[reg].Name); return s; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index ffe6cf19e210..1072bf73f199 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -15,7 +15,9 @@ #define ALPHAREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "AlphaGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "AlphaGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td index d644f05f91ae..32120d750413 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.td +++ b/lib/Target/Alpha/AlphaRegisterInfo.td @@ -110,10 +110,10 @@ def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>; // $28 is undefined after any and all calls /// Register classes -def GPRC : RegisterClass<"Alpha", [i64], 64, +def GPRC : RegisterClass<"Alpha", [i64], 64, (add // Volatile - [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22, - R23, R24, R25, R28, + R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22, + R23, R24, R25, R28, //Special meaning, but volatile R27, //procedure address R26, //return address @@ -121,18 +121,13 @@ def GPRC : RegisterClass<"Alpha", [i64], 64, // Non-volatile R9, R10, R11, R12, R13, R14, // Don't allocate 15, 30, 31 - R15, R30, R31 ]>; //zero + R15, R30, R31)>; //zero -def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1, +def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, // Saved: F2, F3, F4, F5, F6, F7, F8, F9, - F31 ]>; //zero + F31)>; //zero -def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1, - F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, - // Saved: - F2, F3, F4, F5, F6, F7, F8, F9, - F31 ]>; //zero +def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>; diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp index bda7104ab926..624a5e2ebd09 100644 --- a/lib/Target/Alpha/AlphaSubtarget.cpp +++ b/lib/Target/Alpha/AlphaSubtarget.cpp @@ -7,19 +7,30 @@ // //===----------------------------------------------------------------------===// // -// This file implements the Alpha specific subclass of TargetSubtarget. +// This file implements the Alpha specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "AlphaSubtarget.h" #include "Alpha.h" -#include "AlphaGenSubtarget.inc" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "AlphaGenSubtargetInfo.inc" + using namespace llvm; -AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS) - : HasCT(false) { - std::string CPU = "generic"; +AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS) + : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) { + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); } diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h index f0eb93c6cba2..70b311683f8b 100644 --- a/lib/Target/Alpha/AlphaSubtarget.h +++ b/lib/Target/Alpha/AlphaSubtarget.h @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file declares the Alpha specific subclass of TargetSubtarget. +// This file declares the Alpha specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef ALPHASUBTARGET_H #define ALPHASUBTARGET_H -#include "llvm/Target/TargetInstrItineraries.h" -#include "llvm/Target/TargetSubtarget.h" - +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "AlphaGenSubtargetInfo.inc" + namespace llvm { +class StringRe; -class AlphaSubtarget : public TargetSubtarget { +class AlphaSubtarget : public AlphaGenSubtargetInfo { protected: bool HasCT; @@ -32,12 +35,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - AlphaSubtarget(const std::string &TT, const std::string &FS); + AlphaSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool hasCT() const { return HasCT; } }; diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index b53533b8ebcb..3b65d41be892 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "Alpha.h" -#include "AlphaMCAsmInfo.h" #include "AlphaTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Support/FormattedStream.h" @@ -21,15 +20,15 @@ using namespace llvm; extern "C" void LLVMInitializeAlphaTarget() { // Register the target. RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget); - RegisterAsmInfo<AlphaMCAsmInfo> Y(TheAlphaTarget); } AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : LLVMTargetMachine(T, TT), + : LLVMTargetMachine(T, TT, CPU, FS), DataLayout("e-f128:128:128-n64"), FrameLowering(Subtarget), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), TLInfo(*this), TSInfo(*this) { setRelocationModel(Reloc::PIC_); diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 26238fbbc431..cf00e5875d34 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -37,7 +37,7 @@ class AlphaTargetMachine : public LLVMTargetMachine { public: AlphaTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index 454262ad631d..a6027bbf0b2a 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -1,14 +1,11 @@ set(LLVM_TARGET_DEFINITIONS Alpha.td) -tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(AlphaGenRegisterNames.inc -gen-register-enums) -tablegen(AlphaGenRegisterInfo.inc -gen-register-desc) -tablegen(AlphaGenInstrNames.inc -gen-instr-enums) -tablegen(AlphaGenInstrInfo.inc -gen-instr-desc) +tablegen(AlphaGenRegisterInfo.inc -gen-register-info) +tablegen(AlphaGenInstrInfo.inc -gen-instr-info) tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) tablegen(AlphaGenDAGISel.inc -gen-dag-isel) tablegen(AlphaGenCallingConv.inc -gen-callingconv) -tablegen(AlphaGenSubtarget.inc -gen-subtarget) +tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(AlphaCodeGen AlphaAsmPrinter.cpp @@ -18,7 +15,6 @@ add_llvm_target(AlphaCodeGen AlphaISelLowering.cpp AlphaFrameLowering.cpp AlphaLLRP.cpp - AlphaMCAsmInfo.cpp AlphaRegisterInfo.cpp AlphaSubtarget.cpp AlphaTargetMachine.cpp @@ -26,3 +22,4 @@ add_llvm_target(AlphaCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp index a35e8846e072..a35e8846e072 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp +++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h index 837844bd29a9..837844bd29a9 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.h +++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp new file mode 100644 index 000000000000..562052b6df67 --- /dev/null +++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp @@ -0,0 +1,57 @@ +//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Alpha specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AlphaMCTargetDesc.h" +#include "AlphaMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "AlphaGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AlphaGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "AlphaGenRegisterInfo.inc" + +using namespace llvm; + + +static MCInstrInfo *createAlphaMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAlphaMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeAlphaMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo); +} + +static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitAlphaMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeAlphaMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget, + createAlphaMCSubtargetInfo); +} + +extern "C" void LLVMInitializeAlphaMCAsmInfo() { + RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget); +} diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h new file mode 100644 index 000000000000..b0619e6cb011 --- /dev/null +++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h @@ -0,0 +1,40 @@ +//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Alpha specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHAMCTARGETDESC_H +#define ALPHAMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheAlphaTarget; + +} // End llvm namespace + +// Defines symbolic names for Alpha registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "AlphaGenRegisterInfo.inc" + +// Defines symbolic names for the Alpha instructions. +// +#define GET_INSTRINFO_ENUM +#include "AlphaGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AlphaGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..ad0dd26aafb1 --- /dev/null +++ b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMAlphaDesc + AlphaMCTargetDesc.cpp + AlphaMCAsmInfo.cpp + ) diff --git a/lib/Target/Alpha/MCTargetDesc/Makefile b/lib/Target/Alpha/MCTargetDesc/Makefile new file mode 100644 index 000000000000..d55175fa69dc --- /dev/null +++ b/lib/Target/Alpha/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAlphaDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile index 9564be680e51..f48847a0627d 100644 --- a/lib/Target/Alpha/Makefile +++ b/lib/Target/Alpha/Makefile @@ -12,12 +12,10 @@ LIBRARYNAME = LLVMAlphaCodeGen TARGET = Alpha # Make sure that tblgen is run, first thing. -BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ - AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \ - AlphaGenInstrInfo.inc \ +BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ - AlphaGenCallingConv.inc AlphaGenSubtarget.inc + AlphaGenCallingConv.inc AlphaGenSubtargetInfo.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h index ec1fa8689ded..a00ff4cc3275 100644 --- a/lib/Target/Blackfin/Blackfin.h +++ b/lib/Target/Blackfin/Blackfin.h @@ -15,6 +15,7 @@ #ifndef TARGET_BLACKFIN_H #define TARGET_BLACKFIN_H +#include "MCTargetDesc/BlackfinMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -24,15 +25,7 @@ namespace llvm { FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel); - extern Target TheBlackfinTarget; } // end namespace llvm -// Defines symbolic names for Blackfin registers. This defines a mapping from -// register name to register number. -#include "BlackfinGenRegisterNames.inc" - -// Defines symbolic names for the Blackfin instructions. -#include "BlackfinGenInstrNames.inc" - #endif diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 42659aed5d71..215ca43ea338 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -146,21 +146,21 @@ void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) { NI != DAG.allnodes_end(); ++NI) { if (NI->use_empty() || !NI->isMachineOpcode()) continue; - const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode()); + const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode()); for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) { if (!UI->isMachineOpcode()) continue; - if (UI.getUse().getResNo() >= DefTID.getNumDefs()) + if (UI.getUse().getResNo() >= DefMCID.getNumDefs()) continue; const TargetRegisterClass *DefRC = - DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI); + TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI); - const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode()); - if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands()) + const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode()); + if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands()) continue; const TargetRegisterClass *UseRC = - UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI); + TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI); if (!DefRC || !UseRC) continue; // We cannot copy CC <-> !(CC/D) diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 588d9bded87d..d5728324de87 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -621,39 +621,21 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { case 'w': return Pair(0U, ALLRegisterClass); case 'Z': return Pair(P3, PRegisterClass); case 'Y': return Pair(P1, PRegisterClass); + case 'z': return Pair(0U, zConsRegisterClass); + case 'D': return Pair(0U, DConsRegisterClass); + case 'W': return Pair(0U, WConsRegisterClass); + case 'c': return Pair(0U, cConsRegisterClass); + case 't': return Pair(0U, tConsRegisterClass); + case 'u': return Pair(0U, uConsRegisterClass); + case 'k': return Pair(0U, kConsRegisterClass); + case 'y': return Pair(0U, yConsRegisterClass); } // Not implemented: q0-q7, qA. Use {R2} etc instead. - // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to - // getRegClassForInlineAsmConstraint() return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> BlackfinTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { - using namespace BF; - - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - case 'z': return make_vector<unsigned>(P0, P1, P2, 0); - case 'D': return make_vector<unsigned>(R0, R2, R4, R6, 0); - case 'W': return make_vector<unsigned>(R1, R3, R5, R7, 0); - case 'c': return make_vector<unsigned>(I0, I1, I2, I3, - B0, B1, B2, B3, - L0, L1, L2, L3, 0); - case 't': return make_vector<unsigned>(LT0, LT1, 0); - case 'u': return make_vector<unsigned>(LB0, LB1, 0); - case 'k': return make_vector<unsigned>(LC0, LC1, 0); - case 'y': return make_vector<unsigned>(RETS, RETN, RETI, RETX, RETE, - ASTAT, SEQSTAT, USP, 0); - } - - return std::vector<unsigned>(); -} - bool BlackfinTargetLowering:: isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Blackfin target isn't yet aware of offsets. diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 9a54557ad526..b65775b9285d 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -48,9 +48,6 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; const char *getTargetNodeName(unsigned Opcode) const; diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index 598cf2a68c6b..d190ae7984b2 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -14,17 +14,20 @@ #include "BlackfinInstrInfo.h" #include "BlackfinSubtarget.h" #include "Blackfin.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_CTOR #include "BlackfinGenInstrInfo.inc" using namespace llvm; BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST) - : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)), + : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP), RI(ST, *this), Subtarget(ST) {} diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index fdc1029da588..d22ddf0d7313 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -17,9 +17,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "BlackfinRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "BlackfinGenInstrInfo.inc" + namespace llvm { - class BlackfinInstrInfo : public TargetInstrInfoImpl { + class BlackfinInstrInfo : public BlackfinGenInstrInfo { const BlackfinRegisterInfo RI; const BlackfinSubtarget& Subtarget; public: diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index 34a8d3809ea2..ae8ee9e2a1a2 100644 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -83,7 +83,7 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const { static const FunctionType *getType(LLVMContext &Context, unsigned id) { const Type *ResultTy = NULL; - std::vector<const Type*> ArgTys; + std::vector<Type*> ArgTys; bool IsVarArg = false; #define GET_INTRINSIC_GENERATOR diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 6ca460ef803e..3a7c104ee055 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -29,13 +29,15 @@ #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" + +#define GET_REGINFO_TARGET_DESC +#include "BlackfinGenRegisterInfo.inc" + using namespace llvm; BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii) - : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP), - Subtarget(st), - TII(tii) {} + : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {} const unsigned* BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { @@ -356,6 +358,3 @@ int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, llvm_unreachable("What is the dwarf register number"); return -1; } - -#include "BlackfinGenRegisterInfo.inc" - diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 375d277216c2..86f45c17c625 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -16,7 +16,9 @@ #define BLACKFINREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "BlackfinGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "BlackfinGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index d8fd302b513e..1c42205eb780 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -195,108 +195,83 @@ def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>; def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>; // Register classes. -def D16 : RegisterClass<"BF", [i16], 16, - [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, - R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L]>; +def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>; -def D16L : RegisterClass<"BF", [i16], 16, - [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L]>; +def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>; -def D16H : RegisterClass<"BF", [i16], 16, - [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H]>; - -def P16 : RegisterClass<"BF", [i16], 16, - [P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L, - P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>; +def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>; def P16L : RegisterClass<"BF", [i16], 16, - [P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>; + (add (sequence "P%uL", 0, 5), SPL, FPL)>; def P16H : RegisterClass<"BF", [i16], 16, - [P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>; + (add (sequence "P%uH", 0, 5), SPH, FPH)>; + +def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>; -def DP16 : RegisterClass<"BF", [i16], 16, - [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, - R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L, - P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L, - P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>; +def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>; -def DP16L : RegisterClass<"BF", [i16], 16, - [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L, - P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>; +def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>; -def DP16H : RegisterClass<"BF", [i16], 16, - [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H, - P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>; +def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>; def GR16 : RegisterClass<"BF", [i16], 16, - [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, - R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L, - P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L, - P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL, + (add DP16, I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L, M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L, B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L, - L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>; + L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>; -def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { +def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> { let SubRegClasses = [(D16L lo16), (D16H hi16)]; } -def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> { +def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> { let SubRegClasses = [(P16L lo16), (P16H hi16)]; } -def I : RegisterClass<"BF", [i32], 32, [I0, I1, I2, I3]>; -def M : RegisterClass<"BF", [i32], 32, [M0, M1, M2, M3]>; -def B : RegisterClass<"BF", [i32], 32, [B0, B1, B2, B3]>; -def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>; - -def DP : RegisterClass<"BF", [i32], 32, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, FP, SP]> { +def DP : RegisterClass<"BF", [i32], 32, (add D, P)> { let SubRegClasses = [(DP16L lo16), (DP16H hi16)]; } -def GR : RegisterClass<"BF", [i32], 32, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, - I0, I1, I2, I3, M0, M1, M2, M3, - B0, B1, B2, B3, L0, L1, L2, L3, - FP, SP]>; +def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>; +def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>; +def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>; +def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>; + +def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>; def ALL : RegisterClass<"BF", [i32], 32, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, - I0, I1, I2, I3, M0, M1, M2, M3, - B0, B1, B2, B3, L0, L1, L2, L3, - FP, SP, + (add GR, A0X, A0W, A1X, A1W, ASTAT, RETS, LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2, - USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT]>; + USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>; -def PI : RegisterClass<"BF", [i32], 32, - [P0, P1, P2, P3, P4, P5, I0, I1, I2, I3, FP, SP]>; +def PI : RegisterClass<"BF", [i32], 32, (add P, I)>; // We are going to pretend that CC and !CC are 32-bit registers, even though // they only can hold 1 bit. let CopyCost = -1, Size = 8 in { -def JustCC : RegisterClass<"BF", [i32], 8, [CC]>; -def NotCC : RegisterClass<"BF", [i32], 8, [NCC]>; -def AnyCC : RegisterClass<"BF", [i32], 8, [CC, NCC]> { - let MethodProtos = [{ - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - AnyCCClass::iterator - AnyCCClass::allocation_order_end(const MachineFunction &MF) const { - return allocation_order_begin(MF)+1; - } - }]; -} +def JustCC : RegisterClass<"BF", [i32], 8, (add CC)>; +def NotCC : RegisterClass<"BF", [i32], 8, (add NCC)>; +def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)>; def StatBit : RegisterClass<"BF", [i1], 8, - [AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]>; + (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>; } // Should be i40, but that isn't defined. It is not a legal type yet anyway. -def Accu : RegisterClass<"BF", [i64], 64, [A0, A1]>; +def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>; + +// Register classes to match inline asm constraints. +def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>; +def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>; +def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>; +def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3, + B0, B1, B2, B3, + L0, L1, L2, L3)>; +def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>; +def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>; +def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>; +def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX, + RETE, ASTAT, SEQSTAT, + USP)>; diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp index e104c5245a9e..ec919cdf0b90 100644 --- a/lib/Target/Blackfin/BlackfinSubtarget.cpp +++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp @@ -7,18 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file implements the blackfin specific subclass of TargetSubtarget. +// This file implements the blackfin specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "BlackfinSubtarget.h" -#include "BlackfinGenSubtarget.inc" +#include "Blackfin.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "BlackfinGenSubtargetInfo.inc" using namespace llvm; BlackfinSubtarget::BlackfinSubtarget(const std::string &TT, + const std::string &CPU, const std::string &FS) - : sdram(false), + : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false), icplb(false), wa_mi_shift(false), wa_csync(false), @@ -30,7 +36,9 @@ BlackfinSubtarget::BlackfinSubtarget(const std::string &TT, wa_killed_mmr(false), wa_rets(false) { - std::string CPU = "generic"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); } diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h index d667fe26519b..1a01a81116d6 100644 --- a/lib/Target/Blackfin/BlackfinSubtarget.h +++ b/lib/Target/Blackfin/BlackfinSubtarget.h @@ -7,19 +7,23 @@ // //===----------------------------------------------------------------------===// // -// This file declares the BLACKFIN specific subclass of TargetSubtarget. +// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef BLACKFIN_SUBTARGET_H #define BLACKFIN_SUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "BlackfinGenSubtargetInfo.inc" + namespace llvm { +class StringRef; - class BlackfinSubtarget : public TargetSubtarget { + class BlackfinSubtarget : public BlackfinGenSubtargetInfo { bool sdram; bool icplb; bool wa_mi_shift; @@ -32,12 +36,12 @@ namespace llvm { bool wa_killed_mmr; bool wa_rets; public: - BlackfinSubtarget(const std::string &TT, const std::string &FS); + BlackfinSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; } // end namespace llvm diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index e11920f568a2..a1c9f1c05e0d 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -12,7 +12,6 @@ #include "BlackfinTargetMachine.h" #include "Blackfin.h" -#include "BlackfinMCAsmInfo.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" @@ -20,16 +19,15 @@ using namespace llvm; extern "C" void LLVMInitializeBlackfinTarget() { RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget); - RegisterAsmInfo<BlackfinMCAsmInfo> Y(TheBlackfinTarget); - } BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : LLVMTargetMachine(T, TT), + : LLVMTargetMachine(T, TT, CPU, FS), DataLayout("e-p:32:32-i64:32-f64:32-n32"), - Subtarget(TT, FS), + Subtarget(TT, CPU, FS), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index 29b2b177fc3c..bd7dc84f04ae 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -36,7 +36,7 @@ namespace llvm { BlackfinIntrinsicInfo IntrinsicInfo; public: BlackfinTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt index a47299ff1611..d3f33a987e69 100644 --- a/lib/Target/Blackfin/CMakeLists.txt +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -1,13 +1,10 @@ set(LLVM_TARGET_DEFINITIONS Blackfin.td) -tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(BlackfinGenRegisterNames.inc -gen-register-enums) -tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc) -tablegen(BlackfinGenInstrNames.inc -gen-instr-enums) -tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc) +tablegen(BlackfinGenRegisterInfo.inc -gen-register-info) +tablegen(BlackfinGenInstrInfo.inc -gen-instr-info) tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer) tablegen(BlackfinGenDAGISel.inc -gen-dag-isel) -tablegen(BlackfinGenSubtarget.inc -gen-subtarget) +tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget) tablegen(BlackfinGenCallingConv.inc -gen-callingconv) tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic) @@ -18,7 +15,6 @@ add_llvm_target(BlackfinCodeGen BlackfinISelDAGToDAG.cpp BlackfinISelLowering.cpp BlackfinFrameLowering.cpp - BlackfinMCAsmInfo.cpp BlackfinRegisterInfo.cpp BlackfinSubtarget.cpp BlackfinTargetMachine.cpp @@ -26,3 +22,4 @@ add_llvm_target(BlackfinCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp index 5b9d4a29794e..5b9d4a29794e 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp +++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h index c372aa247e04..c372aa247e04 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h +++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp new file mode 100644 index 000000000000..0fa1471ae3e7 --- /dev/null +++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp @@ -0,0 +1,60 @@ +//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Blackfin specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinMCTargetDesc.h" +#include "BlackfinMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "BlackfinGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "BlackfinGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "BlackfinGenRegisterInfo.inc" + +using namespace llvm; + + +static MCInstrInfo *createBlackfinMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitBlackfinMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeBlackfinMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget, + createBlackfinMCInstrInfo); +} + + +static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitBlackfinMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget, + createBlackfinMCSubtargetInfo); +} + +extern "C" void LLVMInitializeBlackfinMCAsmInfo() { + RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget); +} diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h new file mode 100644 index 000000000000..5bffe94fc582 --- /dev/null +++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Blackfin specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFINMCTARGETDESC_H +#define BLACKFINMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheBlackfinTarget; + +} // End llvm namespace + +// Defines symbolic names for Blackfin registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "BlackfinGenRegisterInfo.inc" + +// Defines symbolic names for the Blackfin instructions. +#define GET_INSTRINFO_ENUM +#include "BlackfinGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "BlackfinGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..8cd924f9236f --- /dev/null +++ b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMBlackfinDesc + BlackfinMCTargetDesc.cpp + BlackfinMCAsmInfo.cpp + ) diff --git a/lib/Target/Blackfin/MCTargetDesc/Makefile b/lib/Target/Blackfin/MCTargetDesc/Makefile new file mode 100644 index 000000000000..6b26101f4473 --- /dev/null +++ b/lib/Target/Blackfin/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMBlackfinDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile index 5eb8e9a992b9..756ac6bcd8a0 100644 --- a/lib/Target/Blackfin/Makefile +++ b/lib/Target/Blackfin/Makefile @@ -12,13 +12,12 @@ LIBRARYNAME = LLVMBlackfinCodeGen TARGET = Blackfin # Make sure that tblgen is run, first thing. -BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ - BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \ - BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \ - BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \ +BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \ + BlackfinGenAsmWriter.inc \ + BlackfinGenDAGISel.inc BlackfinGenSubtargetInfo.inc \ BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index fde2e29e80c6..415beb1dd1cd 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -20,7 +20,6 @@ #include "llvm/Instructions.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/InlineAsm.h" @@ -37,6 +36,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetRegistry.h" @@ -61,6 +62,12 @@ extern "C" void LLVMInitializeCBackendTarget() { RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget); } +extern "C" void LLVMInitializeCBackendMCAsmInfo() {} + +extern "C" void LLVMInitializeCBackendMCInstrInfo() {} + +extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {} + namespace { class CBEMCAsmInfo : public MCAsmInfo { public: @@ -69,29 +76,6 @@ namespace { PrivateGlobalPrefix = ""; } }; - /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for - /// any unnamed structure types that are used by the program, and merges - /// external functions with the same name. - /// - class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass { - public: - static char ID; - CBackendNameAllUsedStructsAndMergeFunctions() - : ModulePass(ID) { - initializeFindUsedTypesPass(*PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<FindUsedTypes>(); - } - - virtual const char *getPassName() const { - return "C backend type canonicalizer"; - } - - virtual bool runOnModule(Module &M); - }; - - char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0; /// CWriter - This class is the main chunk of code that converts an LLVM /// module to a C translation unit. @@ -104,7 +88,7 @@ namespace { const MCAsmInfo* TAsm; MCContext *TCtx; const TargetData* TD; - std::map<const Type *, std::string> TypeNames; + std::map<const ConstantFP *, unsigned> FPConstantMap; std::set<Function*> intrinsicPrototypesAlreadyGenerated; std::set<const Argument*> ByValParams; @@ -113,6 +97,10 @@ namespace { DenseMap<const Value*, unsigned> AnonValueNumbers; unsigned NextAnonValueNumber; + /// UnnamedStructIDs - This contains a unique ID for each struct that is + /// either anonymous or has no name. + DenseMap<const StructType*, unsigned> UnnamedStructIDs; + public: static char ID; explicit CWriter(formatted_raw_ostream &o) @@ -158,9 +146,9 @@ namespace { delete TCtx; delete TAsm; FPConstantMap.clear(); - TypeNames.clear(); ByValParams.clear(); intrinsicPrototypesAlreadyGenerated.clear(); + UnnamedStructIDs.clear(); return false; } @@ -177,6 +165,8 @@ namespace { const AttrListPtr &PAL, const PointerType *Ty); + std::string getStructName(const StructType *ST); + /// writeOperandDeref - Print the result of dereferencing the specified /// operand with '*'. This is equivalent to printing '*' then using /// writeOperand, but avoids excess syntax in some cases. @@ -205,9 +195,12 @@ namespace { std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c); void lowerIntrinsics(Function &F); + /// Prints the definition of the intrinsic function F. Supports the + /// intrinsics which need to be explicitly defined in the CBackend. + void printIntrinsicDefinition(const Function &F, raw_ostream &Out); - void printModuleTypes(const TypeSymbolTable &ST); - void printContainedStructs(const Type *Ty, std::set<const Type *> &); + void printModuleTypes(); + void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &); void printFloatingPointConstants(Function &F); void printFloatingPointConstants(const Constant *C); void printFunctionSignature(const Function *F, bool Prototype); @@ -278,7 +271,7 @@ namespace { return AI; } - // isInlineAsm - Check if the instruction is a call to an inline asm chunk + // isInlineAsm - Check if the instruction is a call to an inline asm chunk. static bool isInlineAsm(const Instruction& I) { if (const CallInst *CI = dyn_cast<CallInst>(&I)) return isa<InlineAsm>(CI->getCalledValue()); @@ -351,6 +344,7 @@ namespace { char CWriter::ID = 0; + static std::string CBEMangle(const std::string &S) { std::string Result; @@ -366,90 +360,14 @@ static std::string CBEMangle(const std::string &S) { return Result; } - -/// This method inserts names for any unnamed structure types that are used by -/// the program, and removes names from structure types that are not used by the -/// program. -/// -bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { - // Get a set of types that are used by the program... - SetVector<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes(); - - // Loop over the module symbol table, removing types from UT that are - // already named, and removing names for types that are not used. - // - TypeSymbolTable &TST = M.getTypeSymbolTable(); - for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end(); - TI != TE; ) { - TypeSymbolTable::iterator I = TI++; - - // If this isn't a struct or array type, remove it from our set of types - // to name. This simplifies emission later. - if (!I->second->isStructTy() && !I->second->isOpaqueTy() && - !I->second->isArrayTy()) { - TST.remove(I); - } else { - // If this is not used, remove it from the symbol table. - if (!UT.count(I->second)) - TST.remove(I); - else - UT.remove(I->second); // Only keep one name for this type. - } - } - - // UT now contains types that are not named. Loop over it, naming - // structure types. - // - bool Changed = false; - unsigned RenameCounter = 0; - for (SetVector<const Type *>::const_iterator I = UT.begin(), E = UT.end(); - I != E; ++I) - if ((*I)->isStructTy() || (*I)->isArrayTy()) { - while (M.addTypeName("unnamed"+utostr(RenameCounter), *I)) - ++RenameCounter; - Changed = true; - } - - - // Loop over all external functions and globals. If we have two with - // identical names, merge them. - // FIXME: This code should disappear when we don't allow values with the same - // names when they have different types! - std::map<std::string, GlobalValue*> ExtSymbols; - for (Module::iterator I = M.begin(), E = M.end(); I != E;) { - Function *GV = I++; - if (GV->isDeclaration() && GV->hasName()) { - std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X - = ExtSymbols.insert(std::make_pair(GV->getName(), GV)); - if (!X.second) { - // Found a conflict, replace this global with the previous one. - GlobalValue *OldGV = X.first->second; - GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType())); - GV->eraseFromParent(); - Changed = true; - } - } - } - // Do the same for globals. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E;) { - GlobalVariable *GV = I++; - if (GV->isDeclaration() && GV->hasName()) { - std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X - = ExtSymbols.insert(std::make_pair(GV->getName(), GV)); - if (!X.second) { - // Found a conflict, replace this global with the previous one. - GlobalValue *OldGV = X.first->second; - GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType())); - GV->eraseFromParent(); - Changed = true; - } - } - } - - return Changed; +std::string CWriter::getStructName(const StructType *ST) { + if (!ST->isAnonymous() && !ST->getName().empty()) + return CBEMangle("l_"+ST->getName().str()); + + return "l_unnamed_" + utostr(UnnamedStructIDs[ST]); } + /// printStructReturnPointerFunctionType - This is like printType for a struct /// return type, except, instead of printing the type as void (*)(Struct*, ...) /// print it as "Struct (*)(...)", for struct return functions. @@ -463,7 +381,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, bool PrintedType = false; FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); - const Type *RetTy = cast<PointerType>(I->get())->getElementType(); + const Type *RetTy = cast<PointerType>(*I)->getElementType(); unsigned Idx = 1; for (++I, ++Idx; I != E; ++I, ++Idx) { if (PrintedType) @@ -551,12 +469,6 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, return Out; } - // Check to see if the type is named. - if (!IgnoreName || Ty->isOpaqueTy()) { - std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty); - if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; - } - switch (Ty->getTypeID()) { case Type::FunctionTyID: { const FunctionType *FTy = cast<FunctionType>(Ty); @@ -591,6 +503,11 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, } case Type::StructTyID: { const StructType *STy = cast<StructType>(Ty); + + // Check to see if the type is named. + if (!IgnoreName) + return Out << getStructName(STy) << ' ' << NameSoFar; + Out << NameSoFar + " {\n"; unsigned Idx = 0; for (StructType::element_iterator I = STy->element_begin(), @@ -631,12 +548,6 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, return Out << "; }"; } - case Type::OpaqueTyID: { - std::string TyName = "struct opaque_" + itostr(OpaqueCounter++); - assert(TypeNames.find(Ty) == TypeNames.end()); - TypeNames[Ty] = TyName; - return Out << TyName << ' ' << NameSoFar; - } default: llvm_unreachable("Unhandled case in getTypeProps!"); } @@ -660,7 +571,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { if (isString) { Out << '\"'; - // Keep track of whether the last number was a hexadecimal escape + // Keep track of whether the last number was a hexadecimal escape. bool LastWasHex = false; // Do not include the last character, which we know is null @@ -1751,7 +1662,7 @@ bool CWriter::doInitialization(Module &M) { std::string E; if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TAsm = Match->createAsmInfo(Triple); + TAsm = Match->createMCAsmInfo(Triple); #endif TAsm = new CBEMCAsmInfo(); TCtx = new MCContext(*TAsm, NULL); @@ -1777,6 +1688,7 @@ bool CWriter::doInitialization(Module &M) { Out << "/* Provide Declarations */\n"; Out << "#include <stdarg.h>\n"; // Varargs support Out << "#include <setjmp.h>\n"; // Unwind support + Out << "#include <limits.h>\n"; // With overflow intrinsics support. generateCompilerSpecificCode(Out, TD); // Provide a definition for `bool' if not compiling with a C++ compiler. @@ -1820,8 +1732,8 @@ bool CWriter::doInitialization(Module &M) { << "/* End Module asm statements */\n"; } - // Loop over the symbol table, emitting all named constants... - printModuleTypes(M.getTypeSymbolTable()); + // Loop over the symbol table, emitting all named constants. + printModuleTypes(); // Global variable declarations... if (!M.global_empty()) { @@ -1855,29 +1767,46 @@ bool CWriter::doInitialization(Module &M) { Out << "float fmodf(float, float);\n"; Out << "long double fmodl(long double, long double);\n"; + // Store the intrinsics which will be declared/defined below. + SmallVector<const Function*, 8> intrinsicsToDefine; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { // Don't print declarations for intrinsic functions. - if (!I->isIntrinsic() && I->getName() != "setjmp" && - I->getName() != "longjmp" && I->getName() != "_setjmp") { - if (I->hasExternalWeakLinkage()) - Out << "extern "; - printFunctionSignature(I, true); - if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - if (StaticCtors.count(I)) - Out << " __ATTRIBUTE_CTOR__"; - if (StaticDtors.count(I)) - Out << " __ATTRIBUTE_DTOR__"; - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - if (I->hasName() && I->getName()[0] == 1) - Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; + // Store the used intrinsics, which need to be explicitly defined. + if (I->isIntrinsic()) { + switch (I->getIntrinsicID()) { + default: + break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + intrinsicsToDefine.push_back(I); + break; + } + continue; + } + + if (I->getName() == "setjmp" || + I->getName() == "longjmp" || I->getName() == "_setjmp") + continue; + + if (I->hasExternalWeakLinkage()) + Out << "extern "; + printFunctionSignature(I, true); + if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) + Out << " __ATTRIBUTE_WEAK__"; + if (I->hasExternalWeakLinkage()) + Out << " __EXTERNAL_WEAK__"; + if (StaticCtors.count(I)) + Out << " __ATTRIBUTE_CTOR__"; + if (StaticDtors.count(I)) + Out << " __ATTRIBUTE_DTOR__"; + if (I->hasHiddenVisibility()) + Out << " __HIDDEN__"; + + if (I->hasName() && I->getName()[0] == 1) + Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; - Out << ";\n"; - } + Out << ";\n"; } // Output the global variable declarations @@ -2012,6 +1941,14 @@ bool CWriter::doInitialization(Module &M) { Out << "return X <= Y ; }\n"; Out << "static inline int llvm_fcmp_oge(double X, double Y) { "; Out << "return X >= Y ; }\n"; + + // Emit definitions of the intrinsics. + for (SmallVector<const Function*, 8>::const_iterator + I = intrinsicsToDefine.begin(), + E = intrinsicsToDefine.end(); I != E; ++I) { + printIntrinsicDefinition(**I, Out); + } + return false; } @@ -2085,11 +2022,10 @@ void CWriter::printFloatingPointConstants(const Constant *C) { } - /// printSymbolTable - Run through symbol table looking for type names. If a /// type name is found, emit its declaration... /// -void CWriter::printModuleTypes(const TypeSymbolTable &TST) { +void CWriter::printModuleTypes() { Out << "/* Helper union for bitcasts */\n"; Out << "typedef union {\n"; Out << " unsigned int Int32;\n"; @@ -2098,46 +2034,42 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { Out << " double Double;\n"; Out << "} llvmBitCastUnion;\n"; - // We are only interested in the type plane of the symbol table. - TypeSymbolTable::const_iterator I = TST.begin(); - TypeSymbolTable::const_iterator End = TST.end(); + // Get all of the struct types used in the module. + std::vector<StructType*> StructTypes; + TheModule->findUsedStructTypes(StructTypes); - // If there are no type names, exit early. - if (I == End) return; + if (StructTypes.empty()) return; - // Print out forward declarations for structure types before anything else! Out << "/* Structure forward decls */\n"; - for (; I != End; ++I) { - std::string Name = "struct " + CBEMangle("l_"+I->first); - Out << Name << ";\n"; - TypeNames.insert(std::make_pair(I->second, Name)); - } - Out << '\n'; + unsigned NextTypeID = 0; + + // If any of them are missing names, add a unique ID to UnnamedStructIDs. + // Print out forward declarations for structure types. + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *ST = StructTypes[i]; - // Now we can print out typedefs. Above, we guaranteed that this can only be - // for struct or opaque types. - Out << "/* Typedefs */\n"; - for (I = TST.begin(); I != End; ++I) { - std::string Name = CBEMangle("l_"+I->first); - Out << "typedef "; - printType(Out, I->second, false, Name); - Out << ";\n"; + if (ST->isAnonymous() || ST->getName().empty()) + UnnamedStructIDs[ST] = NextTypeID++; + + std::string Name = getStructName(ST); + + Out << "typedef struct " << Name << ' ' << Name << ";\n"; } Out << '\n'; - // Keep track of which structures have been printed so far... - std::set<const Type *> StructPrinted; + // Keep track of which structures have been printed so far. + SmallPtrSet<const Type *, 16> StructPrinted; // Loop over all structures then push them into the stack so they are // printed in the correct order. // Out << "/* Structure contents */\n"; - for (I = TST.begin(); I != End; ++I) - if (I->second->isStructTy() || I->second->isArrayTy()) + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) + if (StructTypes[i]->isStructTy()) // Only print out used types! - printContainedStructs(I->second, StructPrinted); + printContainedStructs(StructTypes[i], StructPrinted); } // Push the struct onto the stack and recursively push all structs @@ -2146,7 +2078,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // TODO: Make this work properly with vector types // void CWriter::printContainedStructs(const Type *Ty, - std::set<const Type*> &StructPrinted) { + SmallPtrSet<const Type *, 16> &StructPrinted) { // Don't walk through pointers. if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) return; @@ -2156,14 +2088,13 @@ void CWriter::printContainedStructs(const Type *Ty, E = Ty->subtype_end(); I != E; ++I) printContainedStructs(*I, StructPrinted); - if (Ty->isStructTy() || Ty->isArrayTy()) { + if (const StructType *ST = dyn_cast<StructType>(Ty)) { // Check to see if we have already printed this struct. - if (StructPrinted.insert(Ty).second) { - // Print structure type out. - std::string Name = TypeNames[Ty]; - printType(Out, Ty, false, Name, true); - Out << ";\n\n"; - } + if (!StructPrinted.insert(Ty)) return; + + // Print structure type out. + printType(Out, ST, false, getStructName(ST), true); + Out << ";\n\n"; } } @@ -2786,6 +2717,103 @@ void CWriter::visitSelectInst(SelectInst &I) { Out << "))"; } +// Returns the macro name or value of the max or min of an integer type +// (as defined in limits.h). +static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax, + raw_ostream &Out) { + const char* type; + const char* sprefix = ""; + + unsigned NumBits = Ty.getBitWidth(); + if (NumBits <= 8) { + type = "CHAR"; + sprefix = "S"; + } else if (NumBits <= 16) { + type = "SHRT"; + } else if (NumBits <= 32) { + type = "INT"; + } else if (NumBits <= 64) { + type = "LLONG"; + } else { + llvm_unreachable("Bit widths > 64 not implemented yet"); + } + + if (isSigned) + Out << sprefix << type << (isMax ? "_MAX" : "_MIN"); + else + Out << "U" << type << (isMax ? "_MAX" : "0"); +} + +#ifndef NDEBUG +static bool isSupportedIntegerSize(const IntegerType &T) { + return T.getBitWidth() == 8 || T.getBitWidth() == 16 || + T.getBitWidth() == 32 || T.getBitWidth() == 64; +} +#endif + +void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) { + const FunctionType *funT = F.getFunctionType(); + const Type *retT = F.getReturnType(); + const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); + + assert(isSupportedIntegerSize(*elemT) && + "CBackend does not support arbitrary size integers."); + assert(cast<StructType>(retT)->getElementType(0) == elemT && + elemT == funT->getParamType(0) && funT->getNumParams() == 2); + + switch (F.getIntrinsicID()) { + default: + llvm_unreachable("Unsupported Intrinsic."); + case Intrinsic::uadd_with_overflow: + // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) { + // Rty r; + // r.field0 = a + b; + // r.field1 = (r.field0 < a); + // return r; + // } + Out << "static inline "; + printType(Out, retT); + Out << GetValueName(&F); + Out << "("; + printSimpleType(Out, elemT, false); + Out << "a,"; + printSimpleType(Out, elemT, false); + Out << "b) {\n "; + printType(Out, retT); + Out << "r;\n"; + Out << " r.field0 = a + b;\n"; + Out << " r.field1 = (r.field0 < a);\n"; + Out << " return r;\n}\n"; + break; + + case Intrinsic::sadd_with_overflow: + // static inline Rty sadd_ixx(ixx a, ixx b) { + // Rty r; + // r.field1 = (b > 0 && a > XX_MAX - b) || + // (b < 0 && a < XX_MIN - b); + // r.field0 = r.field1 ? 0 : a + b; + // return r; + // } + Out << "static "; + printType(Out, retT); + Out << GetValueName(&F); + Out << "("; + printSimpleType(Out, elemT, true); + Out << "a,"; + printSimpleType(Out, elemT, true); + Out << "b) {\n "; + printType(Out, retT); + Out << "r;\n"; + Out << " r.field1 = (b > 0 && a > "; + printLimitValue(*elemT, true, true, Out); + Out << " - b) || (b < 0 && a < "; + printLimitValue(*elemT, true, false, Out); + Out << " - b);\n"; + Out << " r.field0 = r.field1 ? 0 : a + b;\n"; + Out << " return r;\n}\n"; + break; + } +} void CWriter::lowerIntrinsics(Function &F) { // This is used to keep track of intrinsics that get generated to a lowered @@ -2816,6 +2844,8 @@ void CWriter::lowerIntrinsics(Function &F) { case Intrinsic::x86_sse2_cmp_sd: case Intrinsic::x86_sse2_cmp_pd: case Intrinsic::ppc_altivec_lvsl: + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: // We directly implement these intrinsics break; default: @@ -3109,6 +3139,14 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, writeOperand(I.getArgOperand(0)); Out << ")"; return true; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + Out << GetValueName(I.getCalledFunction()) << "("; + writeOperand(I.getArgOperand(0)); + Out << ", "; + writeOperand(I.getArgOperand(1)); + Out << ")"; + return true; } } @@ -3127,7 +3165,7 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { std::string E; if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TargetAsm = Match->createAsmInfo(Triple); + TargetAsm = Match->createMCAsmInfo(Triple); else return c.Codes[0]; @@ -3520,7 +3558,8 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) { for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); i != e; ++i) { const Type *IndexedTy = - ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1); + ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), + ArrayRef<unsigned>(b, i+1)); if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else @@ -3541,7 +3580,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); i != e; ++i) { const Type *IndexedTy = - ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1); + ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), + ArrayRef<unsigned>(b, i+1)); if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else @@ -3565,7 +3605,6 @@ bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(createGCLoweringPass()); PM.add(createLowerInvokePass()); PM.add(createCFGSimplificationPass()); // clean up after lower invoke. - PM.add(new CBackendNameAllUsedStructsAndMergeFunctions()); PM.add(new CWriter(o)); PM.add(createGCInfoDeleter()); return false; diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 6fed1959ff63..e64216be0bdc 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -20,8 +20,9 @@ namespace llvm { struct CTargetMachine : public TargetMachine { - CTargetMachine(const Target &T, const std::string &TT, const std::string &FS) - : TargetMachine(T) {} + CTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) + : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 09b48ce632f2..f982316fc087 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,6 +1,5 @@ add_llvm_library(LLVMTarget Mangler.cpp - SubtargetFeature.cpp Target.cpp TargetAsmInfo.cpp TargetAsmLexer.cpp @@ -13,7 +12,7 @@ add_llvm_library(LLVMTarget TargetLoweringObjectFile.cpp TargetMachine.cpp TargetRegisterInfo.cpp - TargetSubtarget.cpp + TargetSubtargetInfo.cpp ) set(LLVM_ENUM_ASM_PRINTERS "") diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index a2a2ef1aa9af..0b94e0cf1193 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -1,14 +1,11 @@ set(LLVM_TARGET_DEFINITIONS SPU.td) -tablegen(SPUGenInstrNames.inc -gen-instr-enums) -tablegen(SPUGenRegisterNames.inc -gen-register-enums) tablegen(SPUGenAsmWriter.inc -gen-asm-writer) tablegen(SPUGenCodeEmitter.inc -gen-emitter) -tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SPUGenRegisterInfo.inc -gen-register-desc) -tablegen(SPUGenInstrInfo.inc -gen-instr-desc) +tablegen(SPUGenRegisterInfo.inc -gen-register-info) +tablegen(SPUGenInstrInfo.inc -gen-instr-info) tablegen(SPUGenDAGISel.inc -gen-dag-isel) -tablegen(SPUGenSubtarget.inc -gen-subtarget) +tablegen(SPUGenSubtargetInfo.inc -gen-subtarget) tablegen(SPUGenCallingConv.inc -gen-callingconv) add_llvm_target(CellSPUCodeGen @@ -18,7 +15,6 @@ add_llvm_target(CellSPUCodeGen SPUISelDAGToDAG.cpp SPUISelLowering.cpp SPUFrameLowering.cpp - SPUMCAsmInfo.cpp SPURegisterInfo.cpp SPUSubtarget.cpp SPUTargetMachine.cpp @@ -27,3 +23,4 @@ add_llvm_target(CellSPUCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..85fb258eac2c --- /dev/null +++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMCellSPUDesc + SPUMCTargetDesc.cpp + SPUMCAsmInfo.cpp + ) diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/CellSPU/MCTargetDesc/Makefile new file mode 100644 index 000000000000..10d9a42239ad --- /dev/null +++ b/lib/Target/CellSPU/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMCellSPUDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp index 99aaeb006a0b..8c1176a9d028 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp @@ -15,6 +15,8 @@ using namespace llvm; SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { + IsLittleEndian = false; + ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h index 7f850d347f56..7f850d347f56 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.h +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp new file mode 100644 index 000000000000..26c5a4bc7b33 --- /dev/null +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -0,0 +1,56 @@ +//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Cell SPU specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "SPUMCTargetDesc.h" +#include "SPUMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "SPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SPUGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SPUGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createSPUMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSPUMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeCellSPUMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); +} + +static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitSPUMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, + createSPUMCSubtargetInfo); +} + +extern "C" void LLVMInitializeCellSPUMCAsmInfo() { + RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget); +} diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h new file mode 100644 index 000000000000..c5c037d4de44 --- /dev/null +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h @@ -0,0 +1,40 @@ +//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Alpha specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUMCTARGETDESC_H +#define SPUMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheCellSPUTarget; + +} // End llvm namespace + +// Define symbolic names for Cell registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "SPUGenRegisterInfo.inc" + +// Defines symbolic names for the SPU instructions. +// +#define GET_INSTRINFO_ENUM +#include "SPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SPUGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile index 77c66be9e857..d7a8247f5702 100644 --- a/lib/Target/CellSPU/Makefile +++ b/lib/Target/CellSPU/Makefile @@ -10,12 +10,11 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCellSPUCodeGen TARGET = SPU -BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ +BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \ SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ - SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \ - SPUGenInstrInfo.inc SPUGenDAGISel.inc \ - SPUGenSubtarget.inc SPUGenCallingConv.inc + SPUGenDAGISel.inc \ + SPUGenSubtargetInfo.inc SPUGenCallingConv.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index 72f84300b2c3..b51fbc7a5197 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_IBMCELLSPU_H #define LLVM_TARGET_IBMCELLSPU_H +#include "MCTargetDesc/SPUMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -25,11 +26,6 @@ namespace llvm { FunctionPass *createSPUISelDag(SPUTargetMachine &TM); FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm); - extern Target TheCellSPUTarget; } -// Defines symbolic names for the SPU instructions. -// -#include "SPUGenInstrNames.inc" - #endif /* LLVM_TARGET_IBMCELLSPU_H */ diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp index 432f4a1b59e2..a3e7e73ae30a 100644 --- a/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -13,7 +13,6 @@ #include "SPU.h" #include "SPUFrameLowering.h" -#include "SPURegisterNames.h" #include "SPUInstrBuilder.h" #include "SPUInstrInfo.h" #include "llvm/Function.h" diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9351ffdc0b7f..a297d036f03e 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -16,7 +16,6 @@ #include "SPUTargetMachine.h" #include "SPUHazardRecognizers.h" #include "SPUFrameLowering.h" -#include "SPURegisterNames.h" #include "SPUTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index f9b50419e7bd..f0ceee214149 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "SPURegisterNames.h" #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameLowering.h" @@ -221,6 +220,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 080434d66789..e67b10c7984d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -11,17 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "SPURegisterNames.h" #include "SPUInstrInfo.h" #include "SPUInstrBuilder.h" #include "SPUTargetMachine.h" -#include "SPUGenInstrInfo.inc" #include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/MC/MCContext.h" + +#define GET_INSTRINFO_CTOR +#include "SPUGenInstrInfo.inc" using namespace llvm; @@ -51,7 +53,7 @@ namespace { } SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) - : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])), + : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index e5e91481419a..bc1ba71f7a45 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -18,9 +18,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "SPURegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "SPUGenInstrInfo.inc" + namespace llvm { //! Cell SPU instruction information class - class SPUInstrInfo : public TargetInstrInfoImpl { + class SPUInstrInfo : public SPUGenInstrInfo { SPUTargetMachine &TM; const SPURegisterInfo RI; public: diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 623ae76326bb..19896c0b4be9 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "reginfo" #include "SPU.h" #include "SPURegisterInfo.h" -#include "SPURegisterNames.h" #include "SPUInstrBuilder.h" #include "SPUSubtarget.h" #include "SPUMachineFunction.h" @@ -43,6 +42,9 @@ #include "llvm/ADT/STLExtras.h" #include <cstdlib> +#define GET_REGINFO_TARGET_DESC +#include "SPUGenRegisterInfo.inc" + using namespace llvm; /// getRegisterNumbering - Given the enum value for some register, e.g. @@ -185,9 +187,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii) : - SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), - Subtarget(subtarget), - TII(tii) + SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii) { } @@ -371,5 +371,3 @@ SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, assert( Reg && "Register scavenger failed"); return Reg; } - -#include "SPUGenRegisterInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 6ecf0f28dd9f..5e014f8adbfc 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -16,7 +16,9 @@ #define SPU_REGISTERINFO_H #include "SPU.h" -#include "SPUGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "SPUGenRegisterInfo.inc" namespace llvm { class SPUSubtarget; diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index cce0c823c935..e16f51ff0e02 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -155,147 +155,29 @@ def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>; // The SPU's registers as 128-bit wide entities, and can function as general // purpose registers, where the operands are in the "preferred slot": +// The non-volatile registers are allocated in reverse order, like PPC does it. def GPRC : RegisterClass<"SPU", [i128], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; + (add (sequence "R%u", 0, 79), + (sequence "R%u", 127, 80))>; // The SPU's registers as 64-bit wide (double word integer) "preferred slot": -def R64C : RegisterClass<"SPU", [i64], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>; // The SPU's registers as 64-bit wide (double word) FP "preferred slot": -def R64FP : RegisterClass<"SPU", [f64], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>; // The SPU's registers as 32-bit wide (word) "preferred slot": -def R32C : RegisterClass<"SPU", [i32], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>; // The SPU's registers as single precision floating point "preferred slot": -def R32FP : RegisterClass<"SPU", [f32], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>; // The SPU's registers as 16-bit wide (halfword) "preferred slot": -def R16C : RegisterClass<"SPU", [i16], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>; // The SPU's registers as 8-bit wide (byte) "preferred slot": -def R8C : RegisterClass<"SPU", [i8], 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>; // The SPU's registers as vector registers: -def VECREG : RegisterClass<"SPU", - [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], - 128, - [ - /* volatile register */ - R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, - R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, - R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, - R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, - R77, R78, R79, - /* non-volatile register: take hint from PPC and allocate in reverse order */ - R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115, - R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102, - R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87, - R86, R85, R84, R83, R82, R81, R80, - /* environment ptr, SP, LR */ - R2, R1, R0 ]>; +def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, + (add GPRC)>; diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h index 6c3afdf41fdc..e557ed340a28 100644 --- a/lib/Target/CellSPU/SPURegisterNames.h +++ b/lib/Target/CellSPU/SPURegisterNames.h @@ -13,6 +13,7 @@ // Define symbolic names for Cell registers. This defines a mapping from // register name to register number. // -#include "SPUGenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "SPUGenRegisterInfo.inc" #endif diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index 07c8352fba9f..856dc82f786b 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -7,19 +7,25 @@ // //===----------------------------------------------------------------------===// // -// This file implements the CellSPU-specific subclass of TargetSubtarget. +// This file implements the CellSPU-specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "SPUSubtarget.h" #include "SPU.h" -#include "SPUGenSubtarget.inc" -#include "llvm/ADT/SmallVector.h" #include "SPURegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SPUGenSubtargetInfo.inc" using namespace llvm; -SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) : +SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS) : + SPUGenSubtargetInfo(TT, CPU, FS), StackAlignment(16), ProcDirective(SPU::DEFAULT_PROC), UseLargeMem(false) @@ -29,7 +35,10 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) : std::string default_cpu("v0"); // Parse features string. - ParseSubtargetFeatures(FS, default_cpu); + ParseSubtargetFeatures(default_cpu, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(default_cpu); } /// SetJITMode - This is called to inform the subtarget info that we are @@ -40,9 +49,9 @@ void SPUSubtarget::SetJITMode() { /// Enable PostRA scheduling for optimization levels -O2 and -O3. bool SPUSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, + TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtarget::ANTIDEP_CRITICAL; + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; // CriticalPathsRCs seems to be the set of // RegisterClasses that antidep breakings are performed for. // Do it for all register classes diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index d7929302f080..7c4aa1430217 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -7,20 +7,23 @@ // //===----------------------------------------------------------------------===// // -// This file declares the Cell SPU-specific subclass of TargetSubtarget. +// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef CELLSUBTARGET_H #define CELLSUBTARGET_H -#include "llvm/Target/TargetInstrItineraries.h" -#include "llvm/Target/TargetSubtarget.h" - +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "SPUGenSubtargetInfo.inc" + namespace llvm { class GlobalValue; + class StringRef; namespace SPU { enum { @@ -29,7 +32,7 @@ namespace llvm { }; } - class SPUSubtarget : public TargetSubtarget { + class SPUSubtarget : public SPUGenSubtargetInfo { protected: /// stackAlignment - The minimum alignment known to hold of the stack frame /// on entry to the function and which must be maintained by every function. @@ -50,12 +53,12 @@ namespace llvm { /// This constructor initializes the data members to match that /// of the specified triple. /// - SPUSubtarget(const std::string &TT, const std::string &FS); + SPUSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. @@ -86,7 +89,7 @@ namespace llvm { } bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, + TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 3ed73613a31d..3542a2b87e43 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "SPU.h" -#include "SPURegisterNames.h" -#include "SPUMCAsmInfo.h" #include "SPUTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/RegAllocRegistry.h" @@ -25,7 +23,6 @@ using namespace llvm; extern "C" void LLVMInitializeCellSPUTarget() { // Register the target. RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget); - RegisterAsmInfo<SPULinuxMCAsmInfo> Y(TheCellSPUTarget); } const std::pair<unsigned, int> * @@ -35,9 +32,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { } SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + const std::string &CPU,const std::string &FS) + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 75abd5eb3fca..d96f86dcaeb0 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 797cfd597e60..10d18f61c7e2 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -22,7 +22,9 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/TypeSymbolTable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -32,7 +34,7 @@ #include "llvm/Config/config.h" #include <algorithm> #include <set> - +#include <map> using namespace llvm; static cl::opt<std::string> @@ -75,6 +77,16 @@ extern "C" void LLVMInitializeCppBackendTarget() { RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget); } +extern "C" void LLVMInitializeCppBackendMCAsmInfo() {} + +extern "C" void LLVMInitializeCppBackendMCInstrInfo() { + RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget); +} + +extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() { + RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget); +} + namespace { typedef std::vector<const Type*> TypeList; typedef std::map<const Type*,std::string> TypeMap; @@ -92,8 +104,6 @@ namespace { uint64_t uniqueNum; TypeMap TypeNames; ValueMap ValueNames; - TypeMap UnresolvedTypes; - TypeList TypeStack; NameSet UsedNames; TypeSet DefinedTypes; ValueSet DefinedValues; @@ -140,8 +150,7 @@ namespace { inline void printCppName(const Value* val); void printAttributes(const AttrListPtr &PAL, const std::string &name); - bool printTypeInternal(const Type* Ty); - inline void printType(const Type* Ty); + void printType(const Type* Ty); void printTypes(const Module* M); void printConstant(const Constant *CPV); @@ -188,26 +197,11 @@ static std::string getTypePrefix(const Type *Ty) { case Type::ArrayTyID: return "array_"; case Type::PointerTyID: return "ptr_"; case Type::VectorTyID: return "packed_"; - case Type::OpaqueTyID: return "opaque_"; default: return "other_"; } return "unknown_"; } -// Looks up the type in the symbol table and returns a pointer to its name or -// a null pointer if it wasn't found. Note that this isn't the same as the -// Mode::getTypeName function which will return an empty string, not a null -// pointer if the name is not found. -static const std::string * -findTypeName(const TypeSymbolTable& ST, const Type* Ty) { - TypeSymbolTable::const_iterator TI = ST.begin(); - TypeSymbolTable::const_iterator TE = ST.end(); - for (;TI != TE; ++TI) - if (TI->second == Ty) - return &(TI->first); - return 0; -} - void CppWriter::error(const std::string& msg) { report_fatal_error(msg); } @@ -379,18 +373,20 @@ std::string CppWriter::getCppName(const Type* Ty) { case Type::StructTyID: prefix = "StructTy_"; break; case Type::ArrayTyID: prefix = "ArrayTy_"; break; case Type::PointerTyID: prefix = "PointerTy_"; break; - case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; case Type::VectorTyID: prefix = "VectorTy_"; break; default: prefix = "OtherTy_"; break; // prevent breakage } // See if the type has a name in the symboltable and build accordingly - const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); std::string name; - if (tName) - name = std::string(prefix) + *tName; - else - name = std::string(prefix) + utostr(uniqueNum++); + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (STy->hasName()) + name = STy->getName(); + + if (name.empty()) + name = utostr(uniqueNum++); + + name = std::string(prefix) + name; sanitize(name); // Save the name @@ -503,65 +499,38 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, } } -bool CppWriter::printTypeInternal(const Type* Ty) { +void CppWriter::printType(const Type* Ty) { // We don't print definitions for primitive types if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return false; + return; // If we already defined this type, we don't need to define it again. if (DefinedTypes.find(Ty) != DefinedTypes.end()) - return false; + return; // Everything below needs the name for the type so get it now. std::string typeName(getCppName(Ty)); - // Search the type stack for recursion. If we find it, then generate this - // as an OpaqueType, but make sure not to do this multiple times because - // the type could appear in multiple places on the stack. Once the opaque - // definition is issued, it must not be re-issued. Consequently we have to - // check the UnresolvedTypes list as well. - TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), - Ty); - if (TI != TypeStack.end()) { - TypeMap::const_iterator I = UnresolvedTypes.find(Ty); - if (I == UnresolvedTypes.end()) { - Out << "PATypeHolder " << typeName; - Out << "_fwd = OpaqueType::get(mod->getContext());"; - nl(Out); - UnresolvedTypes[Ty] = typeName; - } - return true; - } - - // We're going to print a derived type which, by definition, contains other - // types. So, push this one we're printing onto the type stack to assist with - // recursive definitions. - TypeStack.push_back(Ty); - // Print the type definition switch (Ty->getTypeID()) { case Type::FunctionTyID: { const FunctionType* FT = cast<FunctionType>(Ty); - Out << "std::vector<const Type*>" << typeName << "_args;"; + Out << "std::vector<Type*>" << typeName << "_args;"; nl(Out); FunctionType::param_iterator PI = FT->param_begin(); FunctionType::param_iterator PE = FT->param_end(); for (; PI != PE; ++PI) { const Type* argTy = static_cast<const Type*>(*PI); - bool isForward = printTypeInternal(argTy); + printType(argTy); std::string argName(getCppName(argTy)); Out << typeName << "_args.push_back(" << argName; - if (isForward) - Out << "_fwd"; Out << ");"; nl(Out); } - bool isForward = printTypeInternal(FT->getReturnType()); + printType(FT->getReturnType()); std::string retTypeName(getCppName(FT->getReturnType())); Out << "FunctionType* " << typeName << " = FunctionType::get("; in(); nl(Out) << "/*Result=*/" << retTypeName; - if (isForward) - Out << "_fwd"; Out << ","; nl(Out) << "/*Params=*/" << typeName << "_args,"; nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; @@ -571,23 +540,37 @@ bool CppWriter::printTypeInternal(const Type* Ty) { } case Type::StructTyID: { const StructType* ST = cast<StructType>(Ty); - Out << "std::vector<const Type*>" << typeName << "_fields;"; + if (!ST->isAnonymous()) { + Out << "StructType *" << typeName << " = "; + Out << "StructType::createNamed(mod->getContext(), \""; + printEscapedString(ST->getName()); + Out << "\");"; + nl(Out); + // Indicate that this type is now defined. + DefinedTypes.insert(Ty); + } + + Out << "std::vector<Type*>" << typeName << "_fields;"; nl(Out); StructType::element_iterator EI = ST->element_begin(); StructType::element_iterator EE = ST->element_end(); for (; EI != EE; ++EI) { const Type* fieldTy = static_cast<const Type*>(*EI); - bool isForward = printTypeInternal(fieldTy); + printType(fieldTy); std::string fieldName(getCppName(fieldTy)); Out << typeName << "_fields.push_back(" << fieldName; - if (isForward) - Out << "_fwd"; Out << ");"; nl(Out); } - Out << "StructType* " << typeName << " = StructType::get(" - << "mod->getContext(), " - << typeName << "_fields, /*isPacked=*/" + + if (ST->isAnonymous()) { + Out << "StructType *" << typeName << " = "; + Out << "StructType::get(" << "mod->getContext(), "; + } else { + Out << typeName << "->setBody("; + } + + Out << typeName << "_fields, /*isPacked=*/" << (ST->isPacked() ? "true" : "false") << ");"; nl(Out); break; @@ -595,122 +578,55 @@ bool CppWriter::printTypeInternal(const Type* Ty) { case Type::ArrayTyID: { const ArrayType* AT = cast<ArrayType>(Ty); const Type* ET = AT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "ArrayType* " << typeName << " = ArrayType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(AT->getNumElements()) << ");"; - nl(Out); + printType(ET); + if (DefinedTypes.find(Ty) == DefinedTypes.end()) { + std::string elemName(getCppName(ET)); + Out << "ArrayType* " << typeName << " = ArrayType::get(" + << elemName + << ", " << utostr(AT->getNumElements()) << ");"; + nl(Out); + } break; } case Type::PointerTyID: { const PointerType* PT = cast<PointerType>(Ty); const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "PointerType* " << typeName << " = PointerType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getAddressSpace()) << ");"; - nl(Out); + printType(ET); + if (DefinedTypes.find(Ty) == DefinedTypes.end()) { + std::string elemName(getCppName(ET)); + Out << "PointerType* " << typeName << " = PointerType::get(" + << elemName + << ", " << utostr(PT->getAddressSpace()) << ");"; + nl(Out); + } break; } case Type::VectorTyID: { const VectorType* PT = cast<VectorType>(Ty); const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "VectorType* " << typeName << " = VectorType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::OpaqueTyID: { - Out << "OpaqueType* " << typeName; - Out << " = OpaqueType::get(mod->getContext());"; - nl(Out); + printType(ET); + if (DefinedTypes.find(Ty) == DefinedTypes.end()) { + std::string elemName(getCppName(ET)); + Out << "VectorType* " << typeName << " = VectorType::get(" + << elemName + << ", " << utostr(PT->getNumElements()) << ");"; + nl(Out); + } break; } default: error("Invalid TypeID"); } - // If the type had a name, make sure we recreate it. - const std::string* progTypeName = - findTypeName(TheModule->getTypeSymbolTable(),Ty); - if (progTypeName) { - Out << "mod->addTypeName(\"" << *progTypeName << "\", " - << typeName << ");"; - nl(Out); - } - - // Pop us off the type stack - TypeStack.pop_back(); - // Indicate that this type is now defined. DefinedTypes.insert(Ty); - // Early resolve as many unresolved types as possible. Search the unresolved - // types map for the type we just printed. Now that its definition is complete - // we can resolve any previous references to it. This prevents a cascade of - // unresolved types. - TypeMap::iterator I = UnresolvedTypes.find(Ty); - if (I != UnresolvedTypes.end()) { - Out << "cast<OpaqueType>(" << I->second - << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; - nl(Out); - Out << I->second << " = cast<"; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: Out << "FunctionType"; break; - case Type::ArrayTyID: Out << "ArrayType"; break; - case Type::StructTyID: Out << "StructType"; break; - case Type::VectorTyID: Out << "VectorType"; break; - case Type::PointerTyID: Out << "PointerType"; break; - case Type::OpaqueTyID: Out << "OpaqueType"; break; - default: Out << "NoSuchDerivedType"; break; - } - Out << ">(" << I->second << "_fwd.get());"; - nl(Out); nl(Out); - UnresolvedTypes.erase(I); - } - // Finally, separate the type definition from other with a newline. nl(Out); - - // We weren't a recursive type - return false; -} - -// Prints a type definition. Returns true if it could not resolve all the -// types in the definition but had to use a forward reference. -void CppWriter::printType(const Type* Ty) { - assert(TypeStack.empty()); - TypeStack.clear(); - printTypeInternal(Ty); - assert(TypeStack.empty()); } void CppWriter::printTypes(const Module* M) { - // Walk the symbol table and print out all its types - const TypeSymbolTable& symtab = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); - TI != TE; ++TI) { - - // For primitive types and types already defined, just add a name - TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || - TNI != TypeNames.end()) { - Out << "mod->addTypeName(\""; - printEscapedString(TI->first); - Out << "\", " << getCppName(TI->second) << ");"; - nl(Out); - // For everything else, define the type - } else { - printType(TI->second); - } - } - - // Add all of the global variables to the value table... + // Add all of the global variables to the value table. for (Module::const_global_iterator I = TheModule->global_begin(), E = TheModule->global_end(); I != E; ++I) { if (I->hasInitializer()) @@ -989,12 +905,12 @@ void CppWriter::printVariableUses(const GlobalVariable *GV) { nl(Out); printType(GV->getType()); if (GV->hasInitializer()) { - Constant *Init = GV->getInitializer(); + const Constant *Init = GV->getInitializer(); printType(Init->getType()); - if (Function *F = dyn_cast<Function>(Init)) { + if (const Function *F = dyn_cast<Function>(Init)) { nl(Out)<< "/ Function Declarations"; nl(Out); printFunctionHead(F); - } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { + } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { nl(Out) << "// Global Variable Declarations"; nl(Out); printVariableHead(gv); @@ -1353,9 +1269,10 @@ void CppWriter::printInstruction(const Instruction *I, printEscapedString(phi->getName()); Out << "\", " << bbname << ");"; nl(Out); - for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { + for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) { Out << iName << "->addIncoming(" - << opNames[i] << ", " << opNames[i+1] << ");"; + << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", " + << getOpName(phi->getIncomingBlock(i)) << ");"; nl(Out); } break; @@ -1954,8 +1871,8 @@ void CppWriter::printVariable(const std::string& fname, Out << "}\n"; } -void CppWriter::printType(const std::string& fname, - const std::string& typeName) { +void CppWriter::printType(const std::string &fname, + const std::string &typeName) { const Type* Ty = TheModule->getTypeByName(typeName); if (!Ty) { error(std::string("Type '") + typeName + "' not found in input module"); diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index e42166e05584..7322e3e34f00 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -23,8 +23,8 @@ class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { CPPTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) - : TargetMachine(T) {} + const std::string &CPU, const std::string &FS) + : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 190379657f42..15965964452a 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -86,8 +86,9 @@ namespace { : MBlazeBaseAsmLexer(T, MAI) { std::string tripleString("mblaze-unknown-unknown"); std::string featureString; + std::string CPU; OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); + targetMachine(T.createTargetMachine(tripleString, CPU, featureString)); InitRegisterMap(targetMachine->getRegisterInfo()); } }; diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 524f33d19335..eebd9d878943 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -32,7 +32,6 @@ struct MBlazeOperand; class MBlazeAsmParser : public TargetAsmParser { MCAsmParser &Parser; - TargetMachine &TM; MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -64,8 +63,8 @@ class MBlazeAsmParser : public TargetAsmParser { public: - MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) - : TargetAsmParser(T), Parser(_Parser), TM(_TM) {} + MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : TargetAsmParser(), Parser(_Parser) {} virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -220,7 +219,7 @@ public: return StringRef(Tok.Data, Tok.Length); } - virtual void dump(raw_ostream &OS) const; + virtual void print(raw_ostream &OS) const; static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) { MBlazeOperand *Op = new MBlazeOperand(Token); @@ -280,7 +279,7 @@ public: } // end anonymous namespace. -void MBlazeOperand::dump(raw_ostream &OS) const { +void MBlazeOperand::print(raw_ostream &OS) const { switch (Kind) { case Immediate: getImm()->print(OS); diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 004057ad4ae3..0bc5b7820378 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -1,16 +1,13 @@ set(LLVM_TARGET_DEFINITIONS MBlaze.td) -tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MBlazeGenRegisterNames.inc -gen-register-enums) -tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc) -tablegen(MBlazeGenInstrNames.inc -gen-instr-enums) -tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc) +tablegen(MBlazeGenRegisterInfo.inc -gen-register-info) +tablegen(MBlazeGenInstrInfo.inc -gen-instr-info) tablegen(MBlazeGenCodeEmitter.inc -gen-emitter) tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher) tablegen(MBlazeGenDAGISel.inc -gen-dag-isel) tablegen(MBlazeGenCallingConv.inc -gen-callingconv) -tablegen(MBlazeGenSubtarget.inc -gen-subtarget) +tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget) tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info) @@ -20,7 +17,6 @@ add_llvm_target(MBlazeCodeGen MBlazeISelDAGToDAG.cpp MBlazeISelLowering.cpp MBlazeFrameLowering.cpp - MBlazeMCAsmInfo.cpp MBlazeRegisterInfo.cpp MBlazeSubtarget.cpp MBlazeTargetMachine.cpp @@ -38,3 +34,4 @@ add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 060a87b7c616..88d80a12eb3a 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -27,9 +27,12 @@ // #include "MBlazeGenDecoderTables.inc" // #include "MBlazeGenRegisterNames.inc" -#include "MBlazeGenInstrInfo.inc" #include "MBlazeGenEDInfo.inc" +namespace llvm { +extern MCInstrDesc MBlazeInsts[]; +} + using namespace llvm; const unsigned UNSUPPORTED = -1; diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index 13c4b49f981c..eacca410b986 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -18,11 +18,10 @@ namespace llvm { class MCOperand; - class TargetMachine; class MBlazeInstPrinter : public MCInstPrinter { public: - MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h index 00c73f06fe10..3390794c9375 100644 --- a/lib/Target/MBlaze/MBlaze.h +++ b/lib/Target/MBlaze/MBlaze.h @@ -15,6 +15,7 @@ #ifndef TARGET_MBLAZE_H #define TARGET_MBLAZE_H +#include "MCTargetDesc/MBlazeMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -22,26 +23,20 @@ namespace llvm { class FunctionPass; class MachineCodeEmitter; class MCCodeEmitter; + class MCInstrInfo; + class MCSubtargetInfo; class TargetAsmBackend; class formatted_raw_ostream; - MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &, - TargetMachine &TM, + MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, MCContext &Ctx); - + TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &); FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM); FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM); - extern Target TheMBlazeTarget; } // end namespace llvm; -// Defines symbolic names for MBlaze registers. This defines a mapping from -// register name to register number. -#include "MBlazeGenRegisterNames.inc" - -// Defines symbolic names for the MBlaze instructions. -#include "MBlazeGenInstrNames.inc" - #endif diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 0f0f60e69f08..0016df569b93 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -319,11 +319,10 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { } static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new MBlazeInstPrinter(TM, MAI); + return new MBlazeInstPrinter(MAI); return 0; } diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index 973e96844e81..c07570a487b9 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -109,7 +109,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, // Hazard check MachineBasicBlock::iterator a = candidate; MachineBasicBlock::iterator b = slot; - TargetInstrDesc desc = candidate->getDesc(); + MCInstrDesc desc = candidate->getDesc(); // MBB layout:- // candidate := a0 = operation(a1, a2) @@ -183,7 +183,7 @@ static bool isDelayFiller(MachineBasicBlock &MBB, if (candidate == MBB.begin()) return false; - TargetInstrDesc brdesc = (--candidate)->getDesc(); + MCInstrDesc brdesc = (--candidate)->getDesc(); return (brdesc.hasDelaySlot()); } @@ -211,7 +211,7 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { break; --I; - TargetInstrDesc desc = I->getDesc(); + MCInstrDesc desc = I->getDesc(); if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) || desc.isCall() || desc.isReturn() || desc.isBarrier() || hasUnknownSideEffects(I)) diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index c5e0a8960ed8..62dfdcc2fd10 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -69,6 +69,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) // Floating point operations which are not supported setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); @@ -1114,15 +1115,19 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( return weight; } -/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), -/// return a list of registers that can be used to satisfy the constraint. -/// This should only be used for C_RegisterClass constraints. +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': return std::make_pair(0U, MBlaze::GPRRegisterClass); + // TODO: These can't possibly be right, but match what was in + // getRegClassForInlineAsmConstraint. + case 'd': + case 'y': case 'f': if (VT == MVT::f32) return std::make_pair(0U, MBlaze::GPRRegisterClass); @@ -1131,32 +1136,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -/// Given a register class constraint, like 'r', if this corresponds directly -/// to an LLVM register class, return a register of 0 and the register class -/// pointer. -std::vector<unsigned> MBlazeTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - default : break; - case 'r': - // GCC MBlaze Constraint Letters - case 'd': - case 'y': - case 'f': - return make_vector<unsigned>( - MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6, - MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11, - MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21, - MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25, - MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29, - MBlaze::R30, MBlaze::R31, 0); - } - return std::vector<unsigned>(); -} - bool MBlazeTargetLowering:: isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The MBlaze target isn't yet aware of offsets. diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 265c1a709bc8..bb128da3c7c0 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -173,10 +173,6 @@ namespace llvm { getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 794ebedf1e6a..188f10a3972e 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -14,18 +14,21 @@ #include "MBlazeInstrInfo.h" #include "MBlazeTargetMachine.h" #include "MBlazeMachineFunction.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/STLExtras.h" + +#define GET_INSTRINFO_CTOR #include "MBlazeGenInstrInfo.inc" using namespace llvm; MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm) - : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)), + : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} static bool isZeroImm(const MachineOperand &op) { diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index b717da8e2bec..79f962b349bf 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -19,6 +19,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "MBlazeRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "MBlazeGenInstrInfo.inc" + namespace llvm { namespace MBlaze { @@ -219,7 +222,7 @@ namespace MBlazeII { }; } -class MBlazeInstrInfo : public TargetInstrInfoImpl { +class MBlazeInstrInfo : public MBlazeGenInstrInfo { MBlazeTargetMachine &TM; const MBlazeRegisterInfo RI; public: diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 7e4a2f5c945e..32d67b264a20 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -92,7 +92,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { static const FunctionType *getType(LLVMContext &Context, unsigned id) { const Type *ResultTy = NULL; - std::vector<const Type*> ArgTys; + std::vector<Type*> ArgTys; bool IsVarArg = false; #define GET_INTRINSIC_GENERATOR diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp index 3ece1a8a340d..ddc636d0ce64 100644 --- a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp +++ b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp @@ -29,13 +29,12 @@ namespace { class MBlazeMCCodeEmitter : public MCCodeEmitter { MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; - MCContext &Ctx; + const MCInstrInfo &MCII; public: - MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx) - : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { + MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii) { } ~MBlazeMCCodeEmitter() {} @@ -96,10 +95,10 @@ public: } // end anonymous namespace -MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &, - TargetMachine &TM, +MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MBlazeMCCodeEmitter(TM, Ctx); + return new MBlazeMCCodeEmitter(MCII, STI, Ctx); } /// getMachineOpValue - Return binary encoding of operand. If the machine @@ -179,7 +178,7 @@ void MBlazeMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); + const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; // Keep track of the current byte being emitted. unsigned CurByte = 0; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 517279fda51e..f0b201a66170 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -37,12 +37,14 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#define GET_REGINFO_TARGET_DESC +#include "MBlazeGenRegisterInfo.inc" + using namespace llvm; MBlazeRegisterInfo:: MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) - : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP), - Subtarget(ST), TII(tii) {} + : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {} /// getRegisterNumbering - Given the enum value for some register, e.g. /// MBlaze::R0, return the number that it corresponds to (e.g. 0). @@ -359,6 +361,3 @@ int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); } - -#include "MBlazeGenRegisterInfo.inc" - diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 380783991ce1..7ebce21d3a80 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -17,7 +17,9 @@ #include "MBlaze.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "MBlazeGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "MBlazeGenRegisterInfo.inc" namespace llvm { class MBlazeSubtarget; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index bd396ed47b36..13c46ba1ecba 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -109,32 +109,9 @@ let Namespace = "MBlaze" in { // Register Classes //===----------------------------------------------------------------------===// -def GPR : RegisterClass<"MBlaze", [i32,f32], 32, - [ - // Return Values and Arguments - R3, R4, R5, R6, R7, R8, R9, R10, +def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>; - // Not preserved across procedure calls - R11, R12, - - // Callee save - R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, - - // Reserved - R0, // Always zero - R1, // The stack pointer - R2, // Read-only small data area anchor - R13, // Read-write small data area anchor - R14, // Return address for interrupts - R15, // Return address for sub-routines - R16, // Return address for trap - R17, // Return address for exceptions - R18, // Reserved for assembler - R19 // The frame-pointer - ]>; - -def SPR : RegisterClass<"MBlaze", [i32], 32, - [ +def SPR : RegisterClass<"MBlaze", [i32], 32, (add // Reserved RPC, RMSR, @@ -160,12 +137,12 @@ def SPR : RegisterClass<"MBlaze", [i32], 32, RPVR9, RPVR10, RPVR11 - ]> + )> { // None of the special purpose registers are allocatable. let isAllocatable = 0; } -def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> { +def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> { let CopyCost = -1; } diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp index a80744a4769a..eda141daf2b3 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -7,29 +7,42 @@ // //===----------------------------------------------------------------------===// // -// This file implements the MBlaze specific subclass of TargetSubtarget. +// This file implements the MBlaze specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "MBlazeSubtarget.h" #include "MBlaze.h" #include "MBlazeRegisterInfo.h" -#include "MBlazeGenSubtarget.inc" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "MBlazeGenSubtargetInfo.inc" + using namespace llvm; -MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS): +MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS): + MBlazeGenSubtargetInfo(TT, CPU, FS), HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false), HasFPU(false), HasMul64(false), HasSqrt(false) { // Parse features string. - std::string CPU = "mblaze"; - CPU = ParseSubtargetFeatures(FS, CPU); + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "mblaze"; + ParseSubtargetFeatures(CPUName, FS); // Only use instruction scheduling if the selected CPU has an instruction // itinerary (the default CPU is the only one that doesn't). - HasItin = CPU != "mblaze"; - DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n"); + HasItin = CPUName != "mblaze"; + DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n"); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); // Compute the issue width of the MBlaze itineraries computeIssueWidth(); @@ -41,11 +54,10 @@ void MBlazeSubtarget::computeIssueWidth() { bool MBlazeSubtarget:: enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, + TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtarget::ANTIDEP_CRITICAL; + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); CriticalPathRCs.push_back(&MBlaze::GPRRegClass); return HasItin && OptLevel >= CodeGenOpt::Default; } - diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h index 2255b2809be2..43b0197ad5aa 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.h +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file declares the MBlaze specific subclass of TargetSubtarget. +// This file declares the MBlaze specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef MBLAZESUBTARGET_H #define MBLAZESUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" -#include "llvm/Target/TargetMachine.h" - +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "MBlazeGenSubtargetInfo.inc" + namespace llvm { +class StringRef; -class MBlazeSubtarget : public TargetSubtarget { +class MBlazeSubtarget : public MBlazeGenSubtargetInfo { protected: bool HasBarrel; @@ -39,12 +42,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. - MBlazeSubtarget(const std::string &TT, const std::string &FS); + MBlazeSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// Compute the number of maximum number of issues per cycle for the /// MBlaze scheduling itineraries. @@ -52,7 +55,7 @@ public: /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, + TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; /// getInstrItins - Return the instruction itineraies based on subtarget. diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index df34a83e33a8..7208874aef1d 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "MBlaze.h" -#include "MBlazeMCAsmInfo.h" #include "MBlazeTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" @@ -21,14 +20,6 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - switch (TheTriple.getOS()) { - default: - return new MBlazeMCAsmInfo(); - } -} - static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &_OS, @@ -55,9 +46,6 @@ extern "C" void LLVMInitializeMBlazeTarget() { // Register the target. RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget); - // Register the target asm info. - RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo); - // Register the MC code emitter TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget, llvm::createMBlazeMCCodeEmitter); @@ -80,9 +68,9 @@ extern "C" void LLVMInitializeMBlazeTarget() { // an easier handling. MBlazeTargetMachine:: MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &FS): - LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + const std::string &CPU, const std::string &FS): + LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS), DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 48ce37a482fc..cd6caafbf309 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -42,7 +42,7 @@ namespace llvm { public: MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..3d15708c35b8 --- /dev/null +++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMMBlazeDesc + MBlazeMCTargetDesc.cpp + MBlazeMCAsmInfo.cpp + ) diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp index 1467141d34ae..0d88466bb300 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp @@ -15,6 +15,8 @@ using namespace llvm; MBlazeMCAsmInfo::MBlazeMCAsmInfo() { + IsLittleEndian = false; + StackGrowsUp = false; SupportsDebugInformation = true; AlignmentIsInBytes = false; PrivateGlobalPrefix = "$"; diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h index e68dd58b016b..e68dd58b016b 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp new file mode 100644 index 000000000000..20d6c0bd2156 --- /dev/null +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -0,0 +1,65 @@ +//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides MBlaze specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeMCTargetDesc.h" +#include "MBlazeMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "MBlazeGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "MBlazeGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "MBlazeGenRegisterInfo.inc" + +using namespace llvm; + + +static MCInstrInfo *createMBlazeMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitMBlazeMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeMBlazeMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo); +} + +static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitMBlazeMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget, + createMBlazeMCSubtargetInfo); +} + +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + default: + return new MBlazeMCAsmInfo(); + } +} + +extern "C" void LLVMInitializeMBlazeMCAsmInfo() { + RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo); +} diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h new file mode 100644 index 000000000000..b14772ef060b --- /dev/null +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides MBlaze specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEMCTARGETDESC_H +#define MBLAZEMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheMBlazeTarget; + +} // End llvm namespace + +// Defines symbolic names for MBlaze registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "MBlazeGenRegisterInfo.inc" + +// Defines symbolic names for the MBlaze instructions. +#define GET_INSTRINFO_ENUM +#include "MBlazeGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "MBlazeGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/MBlaze/MCTargetDesc/Makefile b/lib/Target/MBlaze/MCTargetDesc/Makefile new file mode 100644 index 000000000000..71075ffbf47c --- /dev/null +++ b/lib/Target/MBlaze/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile index e01c60bb8c65..83c2a7d34da1 100644 --- a/lib/Target/MBlaze/Makefile +++ b/lib/Target/MBlaze/Makefile @@ -11,15 +11,14 @@ LIBRARYNAME = LLVMMBlazeCodeGen TARGET = MBlaze # Make sure that tblgen is run, first thing. -BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \ - MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ - MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ - MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \ - MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \ - MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \ - MBlazeGenEDInfo.inc +BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \ + MBlazeGenAsmWriter.inc \ + MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \ + MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \ + MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \ + MBlazeGenEDInfo.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 2c7cbb64418f..33f3d449ed99 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -1,14 +1,11 @@ set(LLVM_TARGET_DEFINITIONS MSP430.td) -tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MSP430GenRegisterNames.inc -gen-register-enums) -tablegen(MSP430GenRegisterInfo.inc -gen-register-desc) -tablegen(MSP430GenInstrNames.inc -gen-instr-enums) -tablegen(MSP430GenInstrInfo.inc -gen-instr-desc) +tablegen(MSP430GenRegisterInfo.inc -gen-register-info) +tablegen(MSP430GenInstrInfo.inc -gen-instr-info) tablegen(MSP430GenAsmWriter.inc -gen-asm-writer) tablegen(MSP430GenDAGISel.inc -gen-dag-isel) tablegen(MSP430GenCallingConv.inc -gen-callingconv) -tablegen(MSP430GenSubtarget.inc -gen-subtarget) +tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget) add_llvm_target(MSP430CodeGen MSP430BranchSelector.cpp @@ -16,7 +13,6 @@ add_llvm_target(MSP430CodeGen MSP430ISelLowering.cpp MSP430InstrInfo.cpp MSP430FrameLowering.cpp - MSP430MCAsmInfo.cpp MSP430RegisterInfo.cpp MSP430Subtarget.cpp MSP430TargetMachine.cpp @@ -27,3 +23,4 @@ add_llvm_target(MSP430CodeGen add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 63860dcc7e3a..50d98b7c41fd 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -18,11 +18,10 @@ namespace llvm { class MCOperand; - class TargetMachine; class MSP430InstPrinter : public MCInstPrinter { public: - MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..0f3ebd303924 --- /dev/null +++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMMSP430Desc + MSP430MCTargetDesc.cpp + MSP430MCAsmInfo.cpp + ) diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index 3f4494460554..ad7d380b5631 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -15,6 +15,8 @@ using namespace llvm; MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) { + PointerSize = 2; + PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; PCSymbol="."; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index f3138a22022d..f3138a22022d 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp new file mode 100644 index 000000000000..43a704d7a7df --- /dev/null +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -0,0 +1,58 @@ +//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides MSP430 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "MSP430MCTargetDesc.h" +#include "MSP430MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "MSP430GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "MSP430GenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "MSP430GenRegisterInfo.inc" + +using namespace llvm; + + +static MCInstrInfo *createMSP430MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitMSP430MCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeMSP430MCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo); +} + + +static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitMSP430MCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeMSP430MCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target, + createMSP430MCSubtargetInfo); +} + +extern "C" void LLVMInitializeMSP430MCAsmInfo() { + RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target); +} diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h new file mode 100644 index 000000000000..0d8a6bdb44f9 --- /dev/null +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides MSP430 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHAMCTARGETDESC_H +#define ALPHAMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheMSP430Target; + +} // End llvm namespace + +// Defines symbolic names for MSP430 registers. +// This defines a mapping from register name to register number. +#define GET_REGINFO_ENUM +#include "MSP430GenRegisterInfo.inc" + +// Defines symbolic names for the MSP430 instructions. +#define GET_INSTRINFO_ENUM +#include "MSP430GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "MSP430GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/MSP430/MCTargetDesc/Makefile b/lib/Target/MSP430/MCTargetDesc/Makefile new file mode 100644 index 000000000000..bb857998eef9 --- /dev/null +++ b/lib/Target/MSP430/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMSP430Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h index e74211807c0d..4574ce5f98b7 100644 --- a/lib/Target/MSP430/MSP430.h +++ b/lib/Target/MSP430/MSP430.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_MSP430_H #define LLVM_TARGET_MSP430_H +#include "MCTargetDesc/MSP430MCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace MSP430CC { @@ -41,15 +42,6 @@ namespace llvm { FunctionPass *createMSP430BranchSelectionPass(); - extern Target TheMSP430Target; - } // end namespace llvm; -// Defines symbolic names for MSP430 registers. -// This defines a mapping from register name to register number. -#include "MSP430GenRegisterNames.inc" - -// Defines symbolic names for the MSP430 instructions. -#include "MSP430GenInstrNames.inc" - #endif diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 5264d680d8b3..2042056617ac 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "asm-printer" #include "MSP430.h" #include "MSP430InstrInfo.h" -#include "MSP430MCAsmInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" #include "InstPrinter/MSP430InstPrinter.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -164,11 +164,10 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { } static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new MSP430InstPrinter(TM, MAI); + return new MSP430InstPrinter(MAI); return 0; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 424df136cc16..846d09361b33 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -15,18 +15,21 @@ #include "MSP430InstrInfo.h" #include "MSP430MachineFunctionInfo.h" #include "MSP430TargetMachine.h" -#include "MSP430GenInstrInfo.inc" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#define GET_INSTRINFO_CTOR +#include "MSP430GenInstrInfo.inc" + using namespace llvm; MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) - : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)), + : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), RI(tm, *this), TM(tm) {} void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -158,13 +161,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { } bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -293,7 +296,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, /// instruction may be. This returns the maximum number of bytes. /// unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const TargetInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); switch (Desc.TSFlags & MSP430II::SizeMask) { default: diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index e885cd36a041..90013f5c2e70 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -17,6 +17,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "MSP430RegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "MSP430GenInstrInfo.inc" + namespace llvm { class MSP430TargetMachine; @@ -37,7 +40,7 @@ namespace MSP430II { }; } -class MSP430InstrInfo : public TargetInstrInfoImpl { +class MSP430InstrInfo : public MSP430GenInstrInfo { const MSP430RegisterInfo RI; MSP430TargetMachine &TM; public: diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 53f4c2e4a887..1cc60bba3a55 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -26,13 +26,15 @@ #include "llvm/ADT/BitVector.h" #include "llvm/Support/ErrorHandling.h" +#define GET_REGINFO_TARGET_DESC +#include "MSP430GenRegisterInfo.inc" + using namespace llvm; // FIXME: Provide proper call frame setup / destroy opcodes. MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii) - : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), - TM(tm), TII(tii) { + : MSP430GenRegisterInfo(), TM(tm), TII(tii) { StackAlign = TM.getFrameLowering()->getStackAlignment(); } @@ -117,12 +119,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; MachineInstr *New = 0; - if (Old->getOpcode() == getCallFrameSetupOpcode()) { + if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SPW) .addReg(MSP430::SPW).addImm(Amount); } else { - assert(Old->getOpcode() == getCallFrameDestroyOpcode()); + assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; @@ -140,7 +142,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.insert(I, New); } } - } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { + } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { @@ -250,5 +252,3 @@ int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { llvm_unreachable("Not implemented yet!"); return 0; } - -#include "MSP430GenRegisterInfo.inc" diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index e82055876f25..fb70594ab37c 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -15,7 +15,9 @@ #define LLVM_TARGET_MSP430REGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "MSP430GenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "MSP430GenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td index 3ef6ab219def..d1c2e3f7915c 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.td +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -66,19 +66,19 @@ def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>; def GR8 : RegisterClass<"MSP430", [i8], 8, // Volatile registers - [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B, + (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B, // Frame pointer, sometimes allocable FPB, // Volatile, but not allocable - PCB, SPB, SRB, CGB]>; + PCB, SPB, SRB, CGB)>; def GR16 : RegisterClass<"MSP430", [i16], 16, // Volatile registers - [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W, + (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W, // Frame pointer, sometimes allocable FPW, // Volatile, but not allocable - PCW, SPW, SRW, CGW]> + PCW, SPW, SRW, CGW)> { let SubRegClasses = [(GR8 subreg_8bit)]; } diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 1346cb9a04dc..b58c50afb982 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -7,19 +7,26 @@ // //===----------------------------------------------------------------------===// // -// This file implements the MSP430 specific subclass of TargetSubtarget. +// This file implements the MSP430 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "MSP430Subtarget.h" #include "MSP430.h" -#include "MSP430GenSubtarget.inc" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "MSP430GenSubtargetInfo.inc" using namespace llvm; -MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) { - std::string CPU = "generic"; +MSP430Subtarget::MSP430Subtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS) : + MSP430GenSubtargetInfo(TT, CPU, FS) { + std::string CPUName = "generic"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); } diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 1070544f0773..1ce5f11fe1bb 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -7,31 +7,35 @@ // //===----------------------------------------------------------------------===// // -// This file declares the MSP430 specific subclass of TargetSubtarget. +// This file declares the MSP430 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_TARGET_MSP430_SUBTARGET_H #define LLVM_TARGET_MSP430_SUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "MSP430GenSubtargetInfo.inc" #include <string> namespace llvm { +class StringRef; -class MSP430Subtarget : public TargetSubtarget { +class MSP430Subtarget : public MSP430GenSubtargetInfo { bool ExtendedInsts; public: /// This constructor initializes the data members to match that /// of the specified triple. /// - MSP430Subtarget(const std::string &TT, const std::string &FS); + MSP430Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index fba95365a6a4..971f512141e8 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "MSP430.h" -#include "MSP430MCAsmInfo.h" #include "MSP430TargetMachine.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" @@ -23,14 +22,14 @@ using namespace llvm; extern "C" void LLVMInitializeMSP430Target() { // Register the target. RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target); - RegisterAsmInfo<MSP430MCAsmInfo> Z(TheMSP430Target); } MSP430TargetMachine::MSP430TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index cee3b0480596..2a9eea0bcd82 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine { public: MSP430TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile index fa4e80b0ff37..82216edd81e4 100644 --- a/lib/Target/MSP430/Makefile +++ b/lib/Target/MSP430/Makefile @@ -12,13 +12,12 @@ LIBRARYNAME = LLVMMSP430CodeGen TARGET = MSP430 # Make sure that tblgen is run, first thing. -BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ - MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \ - MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \ +BUILT_SOURCES = MSP430GenRegisterInfo.inc MSP430GenInstrInfo.inc \ + MSP430GenAsmWriter.inc \ MSP430GenDAGISel.inc MSP430GenCallingConv.inc \ - MSP430GenSubtarget.inc + MSP430GenSubtargetInfo.inc -DIRS = InstPrinter TargetInfo +DIRS = InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index fd16516f3851..36ab1a97e4f8 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -1,14 +1,11 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) -tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(MipsGenRegisterNames.inc -gen-register-enums) -tablegen(MipsGenRegisterInfo.inc -gen-register-desc) -tablegen(MipsGenInstrNames.inc -gen-instr-enums) -tablegen(MipsGenInstrInfo.inc -gen-instr-desc) +tablegen(MipsGenRegisterInfo.inc -gen-register-info) +tablegen(MipsGenInstrInfo.inc -gen-instr-info) tablegen(MipsGenAsmWriter.inc -gen-asm-writer) tablegen(MipsGenDAGISel.inc -gen-dag-isel) tablegen(MipsGenCallingConv.inc -gen-callingconv) -tablegen(MipsGenSubtarget.inc -gen-subtarget) +tablegen(MipsGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(MipsCodeGen MipsAsmPrinter.cpp @@ -19,7 +16,8 @@ add_llvm_target(MipsCodeGen MipsISelDAGToDAG.cpp MipsISelLowering.cpp MipsFrameLowering.cpp - MipsMCAsmInfo.cpp + MipsMCInstLower.cpp + MipsMCSymbolRefExpr.cpp MipsRegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp @@ -27,4 +25,6 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) +add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt new file mode 100644 index 000000000000..8852fd4126e6 --- /dev/null +++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMipsAsmPrinter + MipsInstPrinter.cpp + ) +add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile new file mode 100644 index 000000000000..63e38ef3e6aa --- /dev/null +++ b/lib/Target/Mips/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsAsmPrinter + +# Hack: we need to include 'main' arm target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp new file mode 100644 index 000000000000..41c1dd3919b4 --- /dev/null +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -0,0 +1,127 @@ +//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Mips MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MipsInstPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "MipsGenAsmWriter.inc" + +const char* Mips::MipsFCCToString(Mips::CondCode CC) { + switch (CC) { + case FCOND_F: + case FCOND_T: return "f"; + case FCOND_UN: + case FCOND_OR: return "un"; + case FCOND_OEQ: + case FCOND_UNE: return "eq"; + case FCOND_UEQ: + case FCOND_ONE: return "ueq"; + case FCOND_OLT: + case FCOND_UGE: return "olt"; + case FCOND_ULT: + case FCOND_OGE: return "ult"; + case FCOND_OLE: + case FCOND_UGT: return "ole"; + case FCOND_ULE: + case FCOND_OGT: return "ule"; + case FCOND_SF: + case FCOND_ST: return "sf"; + case FCOND_NGLE: + case FCOND_GLE: return "ngle"; + case FCOND_SEQ: + case FCOND_SNE: return "seq"; + case FCOND_NGL: + case FCOND_GL: return "ngl"; + case FCOND_LT: + case FCOND_NLT: return "lt"; + case FCOND_NGE: + case FCOND_GE: return "nge"; + case FCOND_LE: + case FCOND_NLE: return "le"; + case FCOND_NGT: + case FCOND_GT: return "ngt"; + } + llvm_unreachable("Impossible condition code!"); +} + +StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + +void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << '$' << LowercaseString(getRegisterName(RegNo)); +} + +void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { + printInstruction(MI, O); +} + +void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + printRegName(O, Op.getReg()); + return; + } + + if (Op.isImm()) { + O << Op.getImm(); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); +} + +void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)MO.getImm(); + else + printOperand(MI, opNum, O); +} + +void MipsInstPrinter:: +printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the + // pattern lw $25,%call16($28) + printOperand(MI, opNum+1, O); + O << "("; + printOperand(MI, opNum, O); + O << ")"; +} + +void MipsInstPrinter:: +printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); + return; +} + +void MipsInstPrinter:: +printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand& MO = MI->getOperand(opNum); + O << MipsFCCToString((Mips::CondCode)MO.getImm()); +} diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h new file mode 100644 index 000000000000..680208eb819b --- /dev/null +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -0,0 +1,100 @@ +//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a Mips MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSINSTPRINTER_H +#define MIPSINSTPRINTER_H +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { +// These enumeration declarations were orignally in MipsInstrInfo.h but +// had to be moved here to avoid circular dependencies between +// LLVMMipsCodeGen and LLVMMipsAsmPrinter. +namespace Mips { +// Mips Branch Codes +enum FPBranchCode { + BRANCH_F, + BRANCH_T, + BRANCH_FL, + BRANCH_TL, + BRANCH_INVALID +}; + +// Mips Condition Codes +enum CondCode { + // To be used with float branch True + FCOND_F, + FCOND_UN, + FCOND_OEQ, + FCOND_UEQ, + FCOND_OLT, + FCOND_ULT, + FCOND_OLE, + FCOND_ULE, + FCOND_SF, + FCOND_NGLE, + FCOND_SEQ, + FCOND_NGL, + FCOND_LT, + FCOND_NGE, + FCOND_LE, + FCOND_NGT, + + // To be used with float branch False + // This conditions have the same mnemonic as the + // above ones, but are used with a branch False; + FCOND_T, + FCOND_OR, + FCOND_UNE, + FCOND_ONE, + FCOND_UGE, + FCOND_OGE, + FCOND_UGT, + FCOND_OGT, + FCOND_ST, + FCOND_GLE, + FCOND_SNE, + FCOND_GL, + FCOND_NLT, + FCOND_GE, + FCOND_NLE, + FCOND_GT +}; + +const char *MipsFCCToString(Mips::CondCode CC); +} // end namespace Mips + +class TargetMachine; + +class MipsInstPrinter : public MCInstPrinter { +public: + MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getInstructionName(unsigned Opcode); + static const char *getRegisterName(unsigned RegNo); + + virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + virtual void printInst(const MCInst *MI, raw_ostream &O); + +private: + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O); + void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); + void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..97de75db5347 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMMipsDesc + MipsMCTargetDesc.cpp + MipsMCAsmInfo.cpp + ) diff --git a/lib/Target/Mips/MCTargetDesc/Makefile b/lib/Target/Mips/MCTargetDesc/Makefile new file mode 100644 index 000000000000..7fe2086a6e00 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index c86bf405b8e9..5d9242500f6d 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -12,11 +12,17 @@ //===----------------------------------------------------------------------===// #include "MipsMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + using namespace llvm; MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::mips) + IsLittleEndian = false; + AlignmentIsInBytes = false; - Data16bitsDirective = "\t.half\t"; + Data16bitsDirective = "\t.2byte\t"; Data32bitsDirective = "\t.4byte\t"; Data64bitsDirective = 0; PrivateGlobalPrefix = "$"; @@ -28,4 +34,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::DwarfCFI; HasLEB128 = true; + DwarfRegNumForCFI = true; } diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index 41b719207b7b..41b719207b7b 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp new file mode 100644 index 000000000000..06f0d0bfb6b9 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -0,0 +1,58 @@ +//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Mips specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "MipsMCTargetDesc.h" +#include "MipsMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "MipsGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "MipsGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "MipsGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createMipsMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitMipsMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeMipsMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); +} + + +static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitMipsMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeMipsMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, + createMipsMCSubtargetInfo); +} + +extern "C" void LLVMInitializeMipsMCAsmInfo() { + RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget); + RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget); +} diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h new file mode 100644 index 000000000000..3d18f114c8bd --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -0,0 +1,39 @@ +//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Alpha specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHAMCTARGETDESC_H +#define ALPHAMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheMipsTarget; +extern Target TheMipselTarget; + +} // End llvm namespace + +// Defines symbolic names for Mips registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "MipsGenRegisterInfo.inc" + +// Defines symbolic names for the Mips instructions. +#define GET_INSTRINFO_ENUM +#include "MipsGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "MipsGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index d16b066a624e..cc4a8aef224a 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -12,13 +12,12 @@ LIBRARYNAME = LLVMMipsCodeGen TARGET = Mips # Make sure that tblgen is run, first thing. -BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ - MipsGenRegisterInfo.inc MipsGenInstrNames.inc \ - MipsGenInstrInfo.inc MipsGenAsmWriter.inc \ +BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ + MipsGenAsmWriter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ - MipsGenSubtarget.inc + MipsGenSubtargetInfo.inc -DIRS = TargetInfo +DIRS = InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 76a26a9ba581..984b5adfc5f3 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -15,6 +15,7 @@ #ifndef TARGET_MIPS_H #define TARGET_MIPS_H +#include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -28,16 +29,6 @@ namespace llvm { FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM); FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM); - extern Target TheMipsTarget; - extern Target TheMipselTarget; - } // end namespace llvm; -// Defines symbolic names for Mips registers. This defines a mapping from -// register name to register number. -#include "MipsGenRegisterNames.inc" - -// Defines symbolic names for the Mips instructions. -#include "MipsGenInstrNames.inc" - #endif diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index b79016d788f0..433cd57f34e0 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -88,6 +88,14 @@ def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd, FeatureMinMax, FeatureSwap, FeatureBitCount]>; +def MipsAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + def Mips : Target { let InstructionSet = MipsInstrInfo; + + let AssemblyWriters = [MipsAsmWriter]; } + diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 8caa7cd2f754..69e03bd29724 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,80 +13,49 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" +#include "MipsAsmPrinter.h" #include "Mips.h" -#include "MipsSubtarget.h" #include "MipsInstrInfo.h" -#include "MipsTargetMachine.h" #include "MipsMachineFunction.h" +#include "MipsMCInstLower.h" +#include "InstPrinter/MipsInstPrinter.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class MipsAsmPrinter : public AsmPrinter { - const MipsSubtarget *Subtarget; - public: - explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<MipsSubtarget>(); - } +#include "llvm/Analysis/DebugInfo.h" - virtual const char *getPassName() const { - return "Mips Assembly Printer"; - } +using namespace llvm; - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); - void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); - void printSavedRegsBitmask(raw_ostream &O); - void printHex32(unsigned int Value, raw_ostream &O); - - const char *getCurrentABIString() const; - void emitFrameDirective(); - - void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd. - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock* - MBB) const; - static const char *getRegisterName(unsigned RegNo); +void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); - virtual void EmitFunctionEntryLabel(); - void EmitStartOfAsmFile(Module &M); - }; -} // end of anonymous namespace + if (MI->isDebugValue()) { + PrintDebugValueComment(MI, OS); + return; + } -#include "MipsGenAsmWriter.inc" + MipsMCInstLower MCInstLowering(Mang, *MF, *this); + MCInst TmpInst0; + MCInstLowering.Lower(MI, TmpInst0); + OutStreamer.EmitInstruction(TmpInst0); +} //===----------------------------------------------------------------------===// // @@ -202,9 +171,9 @@ void MipsAsmPrinter::emitFrameDirective() { unsigned stackSize = MF->getFrameInfo()->getStackSize(); OutStreamer.EmitRawText("\t.frame\t$" + - Twine(LowercaseString(getRegisterName(stackReg))) + - "," + Twine(stackSize) + ",$" + - Twine(LowercaseString(getRegisterName(returnReg)))); + Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) + + "," + Twine(stackSize) + ",$" + + Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg)))); } /// Emit Set directives. @@ -304,6 +273,19 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } +bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline asm memory operand"); + O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; + return false; +} + void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); @@ -326,7 +308,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, switch (MO.getType()) { case MachineOperand::MO_Register: - O << '$' << LowercaseString(getRegisterName(MO.getReg())); + O << '$' + << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg())); break; case MachineOperand::MO_Immediate: @@ -380,27 +363,27 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, } void MipsAsmPrinter:: -printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier) { - // when using stack locations for not load/store instructions - // print the same way as all normal 3 operand instructions. - if (Modifier && !strcmp(Modifier, "stackloc")) { - printOperand(MI, opNum+1, O); - O << ", "; - printOperand(MI, opNum, O); - return; - } - +printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { // Load/Store memory operands -- imm($reg) // If PIC target the target is loaded as the // pattern lw $25,%call16($28) - printOperand(MI, opNum, O); - O << "("; printOperand(MI, opNum+1, O); + O << "("; + printOperand(MI, opNum, O); O << ")"; } void MipsAsmPrinter:: +printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); + return; +} + +void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { const MachineOperand& MO = MI->getOperand(opNum); @@ -425,8 +408,33 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitRawText(StringRef("\t.previous")); } +MachineLocation +MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue. + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Unexpected MachineOperand types"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} + +void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + // TODO: implement +} + // Force static initialization. +static MCInstPrinter *createMipsMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI) { + return new MipsInstPrinter(MAI); +} + extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget); + + TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheMipselTarget, + createMipsMCInstPrinter); } diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h new file mode 100644 index 000000000000..16461ff1fbb0 --- /dev/null +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -0,0 +1,71 @@ +//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Mips Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSASMPRINTER_H +#define MIPSASMPRINTER_H + +#include "MipsSubtarget.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class MCStreamer; +class MachineInstr; +class raw_ostream; +class MachineBasicBlock; +class Module; + +class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { + const MipsSubtarget *Subtarget; + +public: + explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<MipsSubtarget>(); + } + + virtual const char *getPassName() const { + return "Mips Assembly Printer"; + } + + void EmitInstruction(const MachineInstr *MI); + void printSavedRegsBitmask(raw_ostream &O); + void printHex32(unsigned int Value, raw_ostream &O); + void emitFrameDirective(); + const char *getCurrentABIString() const; + virtual void EmitFunctionEntryLabel(); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O); + void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier = 0); + void EmitStartOfAsmFile(Module &M); + virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); +}; +} + +#endif + diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 57aeb1d2793c..876f0fcc83ea 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -20,8 +20,8 @@ class CCIfSubtarget<string F, CCAction A>: // Only the return rules are defined here for O32. The rules for argument // passing are defined in MipsISelLowering.cpp. def RetCC_MipsO32 : CallingConv<[ - // i32 are returned in registers V0, V1 - CCIfType<[i32], CCAssignToReg<[V0, V1]>>, + // i32 are returned in registers V0, V1, A0, A1 + CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>, // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index b44a0af2d436..c3a6211399cd 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -59,10 +59,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { - const TargetInstrDesc& Tid = I->getDesc(); - if (Tid.hasDelaySlot() && + const MCInstrDesc& MCid = I->getDesc(); + if (MCid.hasDelaySlot() && (TM.getSubtarget<MipsSubtarget>().isMips1() || - Tid.isCall() || Tid.isBranch() || Tid.isReturn())) { + MCid.isCall() || MCid.isBranch() || MCid.isReturn())) { MachineBasicBlock::iterator J = I; ++J; BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP)); diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp index f49d490565ff..03d922fe7cd6 100644 --- a/lib/Target/Mips/MipsEmitGPRestore.cpp +++ b/lib/Target/Mips/MipsEmitGPRestore.cpp @@ -64,8 +64,8 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) { // Insert lw. ++I; DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addImm(0) - .addFrameIndex(FI); + BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI) + .addImm(0); Changed = true; } @@ -77,8 +77,8 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) { DebugLoc dl = I->getDebugLoc(); // emit lw $gp, ($gp save slot on stack) after jalr - BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addImm(0) - .addFrameIndex(FI); + BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI) + .addImm(0); Changed = true; } } diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp index 4423f5147980..a622258a4dcb 100644 --- a/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/lib/Target/Mips/MipsExpandPseudo.cpp @@ -61,9 +61,9 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { - const TargetInstrDesc& Tid = I->getDesc(); + const MCInstrDesc& MCid = I->getDesc(); - switch(Tid.getOpcode()) { + switch(MCid.getOpcode()) { default: ++I; continue; @@ -87,7 +87,7 @@ void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB, MachineBasicBlock::iterator I) { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); - const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); + const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(DstReg); @@ -103,7 +103,7 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB, unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); - const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); + const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); DebugLoc dl = I->getDebugLoc(); const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index d8a84ce52991..90aaeb60d06f 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -94,6 +94,10 @@ private: inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); }; } @@ -109,7 +113,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: -SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { +SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); @@ -166,7 +170,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); - if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) { + if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || + isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); return true; @@ -195,7 +200,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { SDValue N1 = N->getOperand(1); SDValue Offset0, Offset1, Base; - if (!SelectAddr(N1, Offset0, Base) || + if (!SelectAddr(N1, Base, Offset0) || N1.getValueType() != MVT::i32) return NULL; @@ -225,14 +230,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { // lwc $f0, X($3) // lwc $f1, X+4($3) SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, - MVT::Other, Offset0, Base, Chain); + MVT::Other, Base, Offset0, Chain); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(LD0, 0)); SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, - MVT::Other, Offset1, Base, SDValue(LD0, 1)); + MVT::Other, Base, Offset1, SDValue(LD0, 1)); SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(LD1, 0)); @@ -259,7 +264,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { SDValue N2 = N->getOperand(2); SDValue Offset0, Offset1, Base; - if (!SelectAddr(N2, Offset0, Base) || + if (!SelectAddr(N2, Base, Offset0) || N1.getValueType() != MVT::f64 || N2.getValueType() != MVT::i32) return NULL; @@ -289,12 +294,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { // Generate: // swc $f0, X($3) // swc $f1, X+4($3) - SDValue Ops0[] = { FPEven, Offset0, Base, Chain }; + SDValue Ops0[] = { FPEven, Base, Offset0, Chain }; Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, MVT::Other, Ops0, 4), 0); cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); - SDValue Ops1[] = { FPOdd, Offset1, Base, Chain }; + SDValue Ops1[] = { FPOdd, Base, Offset1, Chain }; Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, MVT::Other, Ops1, 4), 0); cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -462,6 +467,14 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return ResNode; } +bool MipsDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector<SDValue> &OutOps) { + assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + OutOps.push_back(Op); + return false; +} + /// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index fd90731f50d2..b4f4b1b4bf04 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" +#include "InstPrinter/MipsInstPrinter.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -59,6 +60,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC"; + case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; default: return NULL; } } @@ -144,6 +146,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); @@ -773,7 +777,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, } BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr).addImm(0).addFrameIndex(fi); + .addReg(Incr).addFrameIndex(fi).addImm(0); } BB->addSuccessor(loopMBB); @@ -784,7 +788,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // sc tmp1, 0(ptr) // beq tmp1, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval); if (Nand) { // and tmp2, oldval, incr @@ -797,10 +801,10 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, } else { // lw tmp2, fi(sp) // load incr from stack // or tmp1, $zero, tmp2 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);; + BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); } - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB); BB->addSuccessor(loopMBB); @@ -909,7 +913,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, } BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr2).addImm(0).addFrameIndex(fi); + .addReg(Incr2).addFrameIndex(fi).addImm(0); } BB->addSuccessor(loopMBB); @@ -922,7 +926,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // sc tmp9,0(addr) // beq tmp9,$0,loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Addr); + BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0); if (Nand) { // and tmp6, oldval, incr2 // nor tmp7, $0, tmp6 @@ -937,13 +941,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, } else { // lw tmp6, fi(sp) // load incr2 from stack // or tmp7, $zero, tmp6 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addImm(0).addFrameIndex(fi);; + BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0); BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); } BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addImm(0).addReg(Addr); + BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB); BB->addSuccessor(loopMBB); @@ -1026,14 +1030,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // hoist "or" instruction out of the block loop2MBB. BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Newval).addImm(0).addFrameIndex(fi); + .addReg(Newval).addFrameIndex(fi).addImm(0); BB->addSuccessor(loop1MBB); // loop1MBB: // ll dest, 0(ptr) // bne dest, oldval, exitMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Dest).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BNE)) .addReg(Dest).addReg(Oldval).addMBB(exitMBB); BB->addSuccessor(exitMBB); @@ -1045,9 +1049,9 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // sc tmp1, 0(ptr) // beq tmp1, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);; + BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); - BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr); + BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB); BB->addSuccessor(loop1MBB); @@ -1142,7 +1146,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // and oldval4,oldval3,mask // bne oldval4,oldval2,exitMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addImm(0).addReg(Addr); + BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0); BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::BNE)) .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB); @@ -1158,7 +1162,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2); BuildMI(BB, dl, TII->get(Mips::SC), Tmp7) - .addReg(Tmp7).addImm(0).addReg(Addr); + .addReg(Tmp7).addReg(Addr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB); BB->addSuccessor(loop1MBB); @@ -1189,9 +1193,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, SDValue MipsTargetLowering:: LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - unsigned StackAlignment = - getTargetMachine().getFrameLowering()->getStackAlignment(); - assert(StackAlignment >= + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + assert(getTargetMachine().getFrameLowering()->getStackAlignment() >= cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() && "Cannot lower if the alignment of the allocated space is larger than \ that of the stack."); @@ -1211,24 +1216,14 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const // must be placed in the stack pointer register. Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub, SDValue()); - // Retrieve updated $sp. There is a glue input to prevent instructions that - // clobber $sp from being inserted between copytoreg and copyfromreg. - SDValue NewSP = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32, - Chain.getValue(1)); - - // The stack space reserved by alloca is located right above the argument - // area. It is aligned on a boundary that is a multiple of StackAlignment. - MachineFunction &MF = DAG.getMachineFunction(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - unsigned SPOffset = (MipsFI->getMaxCallFrameSize() + StackAlignment - 1) / - StackAlignment * StackAlignment; - SDValue AllocPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP, - DAG.getConstant(SPOffset, MVT::i32)); // This node always has two return values: a new stack pointer // value and a chain - SDValue Ops[2] = { AllocPtr, NewSP.getValue(1) }; - return DAG.getMergeValues(Ops, 2, dl); + SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy()); + SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) }; + + return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3); } SDValue MipsTargetLowering:: @@ -1358,7 +1353,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { // General Dynamic TLS Model SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, - 0, MipsII::MO_TLSGD); + 0, MipsII::MO_TLSGD); SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA); SDValue GP = DAG.getRegister(Mips::GP, MVT::i32); SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd); @@ -1370,36 +1365,36 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const Args.push_back(Entry); std::pair<SDValue, SDValue> CallResult = LowerCallTo(DAG.getEntryNode(), - (const Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, false, true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + (const Type *) Type::getInt32Ty(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, true, + DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, + dl); return CallResult.first; - } else { - SDValue Offset; - if (GV->isDeclaration()) { - // Initial Exec TLS Model - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_GOTTPREL); - Offset = DAG.getLoad(MVT::i32, dl, - DAG.getEntryNode(), TGA, MachinePointerInfo(), - false, false, 0); - } else { - // Local Exec TLS Model - SDVTList VTs = DAG.getVTList(MVT::i32); - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_TPREL_HI); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_TPREL_LO); - SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1); - SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo); - Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); - } + } - SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT); - return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); + SDValue Offset; + if (GV->isDeclaration()) { + // Initial Exec TLS Model + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_GOTTPREL); + Offset = DAG.getLoad(MVT::i32, dl, + DAG.getEntryNode(), TGA, MachinePointerInfo(), + false, false, 0); + } else { + // Local Exec TLS Model + SDVTList VTs = DAG.getVTList(MVT::i32); + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_TPREL_HI); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_TPREL_LO); + SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1); + SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo); + Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); } + + SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT); + return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } SDValue MipsTargetLowering:: @@ -1550,8 +1545,8 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) SDValue MipsTargetLowering:: LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - assert((Depth == 0) && + // check the depth + assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) && "Frame address can only be determined for current frame."); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); @@ -1770,6 +1765,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (IsPIC && !MipsFI->getGPFI()) MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true)); + // Get the frame index of the stack frame object that points to the location + // of dynamically allocated area on the stack. + int DynAllocFI = MipsFI->getDynAllocFI(); + // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. @@ -1781,14 +1780,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (MaxCallFrameSize < NextStackOffset) { MipsFI->setMaxCallFrameSize(NextStackOffset); - if (IsPIC) { - // $gp restore slot must be aligned. - unsigned StackAlignment = TFL->getStackAlignment(); - NextStackOffset = (NextStackOffset + StackAlignment - 1) / - StackAlignment * StackAlignment; - int GPFI = MipsFI->getGPFI(); - MFI->setObjectOffset(GPFI, NextStackOffset); - } + // Set the offsets relative to $sp of the $gp restore slot and dynamically + // allocated stack space. These offsets must be aligned to a boundary + // determined by the stack alignment of the ABI. + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = (NextStackOffset + StackAlignment - 1) / + StackAlignment * StackAlignment; + + if (IsPIC) + MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset); + + MFI->setObjectOffset(DynAllocFI, NextStackOffset); } // With EABI is it possible to have 16 args on registers. @@ -1912,7 +1914,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (LoadSymAddr) { // Load callee address Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee); - SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee, + SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, 0); @@ -1922,9 +1924,6 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo); } else Callee = LoadValue; - - // Use chain output from LoadValue - Chain = LoadValue.getValue(1); } // copy to T9 @@ -1965,7 +1964,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true), + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NextStackOffset, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); @@ -2332,14 +2332,16 @@ MipsTargetLowering::getSingleConstraintMatchWeight( return weight; } -/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), -/// return a list of registers that can be used to satisfy the constraint. -/// This should only be used for C_RegisterClass constraints. +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { + case 'd': // Address register. Same as 'r' unless generating MIPS16 code. + case 'y': // Same as 'r'. Exists for compatibility. case 'r': return std::make_pair(0U, Mips::CPURegsRegisterClass); case 'f': @@ -2348,55 +2350,12 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const if (VT == MVT::f64) if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) return std::make_pair(0U, Mips::AFGR64RegisterClass); + break; } } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -/// Given a register class constraint, like 'r', if this corresponds directly -/// to an LLVM register class, return a register of 0 and the register class -/// pointer. -std::vector<unsigned> MipsTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const -{ - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - default : break; - case 'r': - // GCC Mips Constraint Letters - case 'd': - case 'y': - return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3, - Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1, - Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::T8, 0); - - case 'f': - if (VT == MVT::f32) { - if (Subtarget->isSingleFloat()) - return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5, - Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11, - Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, - Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, - Mips::F30, Mips::F31, 0); - else - return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8, - Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26, - Mips::F28, Mips::F30, 0); - } - - if (VT == MVT::f64) - if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) - return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4, - Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13, - Mips::D14, Mips::D15, 0); - } - return std::vector<unsigned>(); -} - bool MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Mips target isn't yet aware of offsets. diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index fbcedfddf99a..bda26a229e72 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -79,7 +79,9 @@ namespace llvm { BuildPairF64, ExtractElementF64, - WrapperPIC + WrapperPIC, + + DynAlloc }; } @@ -167,10 +169,6 @@ namespace llvm { getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// isFPImmLegal - Returns true if the target can instruction select the diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index be044fa1f3b3..0a7a7f2dfe4e 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -14,18 +14,27 @@ #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "MipsMachineFunction.h" -#include "llvm/ADT/STLExtras.h" +#include "InstPrinter/MipsInstPrinter.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/STLExtras.h" + +#define GET_INSTRINFO_CTOR #include "MipsGenInstrInfo.inc" using namespace llvm; MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) - : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)), + : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + +const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { + return RI; +} + static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } @@ -40,10 +49,10 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) || (MI->getOpcode() == Mips::LDC1)) { - if ((MI->getOperand(2).isFI()) && // is a stack slot - (MI->getOperand(1).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(1)))) { - FrameIndex = MI->getOperand(2).getIndex(); + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); return MI->getOperand(0).getReg(); } } @@ -61,10 +70,10 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) || (MI->getOpcode() == Mips::SDC1)) { - if ((MI->getOperand(2).isFI()) && // is a stack slot - (MI->getOperand(1).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(1)))) { - FrameIndex = MI->getOperand(2).getIndex(); + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); return MI->getOperand(0).getReg(); } } @@ -161,25 +170,25 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == Mips::CPURegsRegisterClass) BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); else if (RC == Mips::FGR32RegisterClass) BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill)) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); else if (RC == Mips::AFGR64RegisterClass) { if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { BuildMI(MBB, I, DL, get(Mips::SDC1)) .addReg(SrcReg, getKillRegState(isKill)) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); } else { const TargetRegisterInfo *TRI = MBB.getParent()->getTarget().getRegisterInfo(); const unsigned *SubSet = TRI->getSubRegisters(SrcReg); BuildMI(MBB, I, DL, get(Mips::SWC1)) .addReg(SubSet[0], getKillRegState(isKill)) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); BuildMI(MBB, I, DL, get(Mips::SWC1)) .addReg(SubSet[1], getKillRegState(isKill)) - .addImm(4).addFrameIndex(FI); + .addFrameIndex(FI).addImm(4); } } else llvm_unreachable("Register class not handled!"); @@ -195,25 +204,34 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); if (RC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0); else if (RC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0); else if (RC == Mips::AFGR64RegisterClass) { if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { - BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0); } else { const TargetRegisterInfo *TRI = MBB.getParent()->getTarget().getRegisterInfo(); const unsigned *SubSet = TRI->getSubRegisters(DestReg); BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1]) - .addImm(4).addFrameIndex(FI); + .addFrameIndex(FI).addImm(4); } } else llvm_unreachable("Register class not handled!"); } +MachineInstr* +MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// @@ -341,8 +359,8 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand>& Cond) const { unsigned Opc = Cond[0].getImm(); - const TargetInstrDesc &TID = get(Opc); - MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID); + const MCInstrDesc &MCID = get(Opc); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); for (unsigned i = 1; i < Cond.size(); ++i) MIB.addReg(Cond[i].getReg()); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index abf67733f083..4421c4862fa0 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -19,103 +19,15 @@ #include "llvm/Target/TargetInstrInfo.h" #include "MipsRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "MipsGenInstrInfo.inc" + namespace llvm { namespace Mips { - - // Mips Branch Codes - enum FPBranchCode { - BRANCH_F, - BRANCH_T, - BRANCH_FL, - BRANCH_TL, - BRANCH_INVALID - }; - - // Mips Condition Codes - enum CondCode { - // To be used with float branch True - FCOND_F, - FCOND_UN, - FCOND_OEQ, - FCOND_UEQ, - FCOND_OLT, - FCOND_ULT, - FCOND_OLE, - FCOND_ULE, - FCOND_SF, - FCOND_NGLE, - FCOND_SEQ, - FCOND_NGL, - FCOND_LT, - FCOND_NGE, - FCOND_LE, - FCOND_NGT, - - // To be used with float branch False - // This conditions have the same mnemonic as the - // above ones, but are used with a branch False; - FCOND_T, - FCOND_OR, - FCOND_UNE, - FCOND_ONE, - FCOND_UGE, - FCOND_OGE, - FCOND_UGT, - FCOND_OGT, - FCOND_ST, - FCOND_GLE, - FCOND_SNE, - FCOND_GL, - FCOND_NLT, - FCOND_GE, - FCOND_NLE, - FCOND_GT - }; - /// GetOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. unsigned GetOppositeBranchOpc(unsigned Opc); - - /// MipsCCToString - Map each FP condition code to its string - inline static const char *MipsFCCToString(Mips::CondCode CC) - { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case FCOND_F: - case FCOND_T: return "f"; - case FCOND_UN: - case FCOND_OR: return "un"; - case FCOND_OEQ: - case FCOND_UNE: return "eq"; - case FCOND_UEQ: - case FCOND_ONE: return "ueq"; - case FCOND_OLT: - case FCOND_UGE: return "olt"; - case FCOND_ULT: - case FCOND_OGE: return "ult"; - case FCOND_OLE: - case FCOND_UGT: return "ole"; - case FCOND_ULE: - case FCOND_OGT: return "ule"; - case FCOND_SF: - case FCOND_ST: return "sf"; - case FCOND_NGLE: - case FCOND_GLE: return "ngle"; - case FCOND_SEQ: - case FCOND_SNE: return "seq"; - case FCOND_NGL: - case FCOND_GL: return "ngl"; - case FCOND_LT: - case FCOND_NLT: return "lt"; - case FCOND_NGE: - case FCOND_GE: return "nge"; - case FCOND_LE: - case FCOND_NLE: return "le"; - case FCOND_NGT: - case FCOND_GT: return "ngt"; - } - } } /// MipsII - This namespace holds all of the target specific flags that @@ -164,7 +76,7 @@ namespace MipsII { }; } -class MipsInstrInfo : public TargetInstrInfoImpl { +class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; const MipsRegisterInfo RI; public: @@ -174,7 +86,7 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; } + virtual const MipsRegisterInfo &getRegisterInfo() const; /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -224,6 +136,11 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 329a002667a0..d1a058712459 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -39,6 +39,9 @@ def SDT_MipsDivRem : SDTypeProfile<0, 2, def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; +def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, iPTR>]>; + // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, @@ -99,6 +102,10 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>; +// Pointer to dynamically allocated stack area. +def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, + [SDNPHasChain, SDNPInGlue]>; + //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// @@ -127,7 +134,12 @@ def uimm16 : Operand<i32> { // Address operand def mem : Operand<i32> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops simm16, CPURegs); + let MIOperandInfo = (ops CPURegs, simm16); +} + +def mem_ea : Operand<i32> { + let PrintMethod = "printMemOperandEA"; + let MIOperandInfo = (ops CPURegs, simm16); } // Transformation Function - get the lower 16 bits. @@ -344,7 +356,7 @@ class MoveToLOHI<bits<6> func, string instr_asm>: !strconcat(instr_asm, "\t$src"), [], IIHiLo>; class EffectiveAddress<string instr_asm> : - FI<0x09, (outs CPURegs:$dst), (ins mem:$addr), + FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr), instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>; // Count Leading Ones/Zeros in Word @@ -412,7 +424,7 @@ def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; // are used, we have the same behavior, but get also a bunch of warnings // from the assembler. def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; -def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc\n", []>; +def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { def ATOMIC_LOAD_ADD_I8 : MipsPseudo< @@ -673,7 +685,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">; +def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">; + +// DynAlloc node points to dynamically allocated stack space. +// $sp is added to the list of implicitly used registers to prevent dead code +// elimination from removing instructions that modify $sp. +let Uses = [SP] in +def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; @@ -852,6 +870,9 @@ def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs), def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs), (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>; +// select MipsDynAlloc +def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp new file mode 100644 index 000000000000..f5cc3aa25f1b --- /dev/null +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -0,0 +1,118 @@ +//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Mips MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "MipsMCInstLower.h" +#include "MipsAsmPrinter.h" +#include "MipsInstrInfo.h" +#include "MipsMCSymbolRefExpr.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" +using namespace llvm; + +MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, + MipsAsmPrinter &asmprinter) + : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {} + +MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy) const { + MipsMCSymbolRefExpr::VariantKind Kind; + const MCSymbol *Symbol; + int Offset = 0; + + switch(MO.getTargetFlags()) { + default: assert(0 && "Invalid target flag!"); + case MipsII::MO_NO_FLAG: Kind = MipsMCSymbolRefExpr::VK_Mips_None; break; + case MipsII::MO_GPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break; + case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break; + case MipsII::MO_GOT: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break; + case MipsII::MO_ABS_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break; + case MipsII::MO_ABS_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break; + case MipsII::MO_TLSGD: Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break; + case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break; + case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break; + case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break; + } + + switch (MOTy) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + if (MO.getOffset()) + Offset = MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); + } + + return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset, + Ctx)); +} + +void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand(MO, MOTy); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h new file mode 100644 index 000000000000..ec5201be7f6d --- /dev/null +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -0,0 +1,43 @@ +//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSMCINSTLOWER_H +#define MIPSMCINSTLOWER_H +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCAsmInfo; + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineFunction; + class Mangler; + class MipsAsmPrinter; + +/// MipsMCInstLower - This class is used to lower an MachineInstr into an +// MCInst. +class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { + typedef MachineOperand::MachineOperandType MachineOperandType; + MCContext &Ctx; + Mangler *Mang; + MipsAsmPrinter &AsmPrinter; +public: + MipsMCInstLower(Mangler *mang, const MachineFunction &MF, + MipsAsmPrinter &asmprinter); + void Lower(const MachineInstr *MI, MCInst &OutMI) const; +private: + MCOperand LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy) const; +}; +} + +#endif diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp new file mode 100644 index 000000000000..9a2bdae0e339 --- /dev/null +++ b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp @@ -0,0 +1,63 @@ +//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mipsmcsymbolrefexpr" +#include "MipsMCSymbolRefExpr.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +const MipsMCSymbolRefExpr* +MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol, + int Offset, MCContext &Ctx) { + return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset); +} + +void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: assert(0 && "Invalid kind!"); + case VK_Mips_None: break; + case VK_Mips_GPREL: OS << "%gp_rel("; break; + case VK_Mips_GOT_CALL: OS << "%call16("; break; + case VK_Mips_GOT: OS << "%got("; break; + case VK_Mips_ABS_HI: OS << "%hi("; break; + case VK_Mips_ABS_LO: OS << "%lo("; break; + case VK_Mips_TLSGD: OS << "%tlsgd("; break; + case VK_Mips_GOTTPREL: OS << "%gottprel("; break; + case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; + case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; + } + + OS << *Symbol; + + if (Offset) { + if (Offset > 0) + OS << '+'; + OS << Offset; + } + + if (Kind != VK_Mips_None) + OS << ')'; +} + +bool +MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + return false; +} + +void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const { + Asm->getOrCreateSymbolData(*Symbol); +} + +const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const { + return Symbol->isDefined() ? &Symbol->getSection() : NULL; +} + diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h new file mode 100644 index 000000000000..3e695963709e --- /dev/null +++ b/lib/Target/Mips/MipsMCSymbolRefExpr.h @@ -0,0 +1,62 @@ +//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSMCSYMBOLREFEXPR_H +#define MIPSMCSYMBOLREFEXPR_H +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class MipsMCSymbolRefExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_Mips_None, + VK_Mips_GPREL, + VK_Mips_GOT_CALL, + VK_Mips_GOT, + VK_Mips_ABS_HI, + VK_Mips_ABS_LO, + VK_Mips_TLSGD, + VK_Mips_GOTTPREL, + VK_Mips_TPREL_HI, + VK_Mips_TPREL_LO + }; + +private: + const VariantKind Kind; + const MCSymbol *Symbol; + int Offset; + + explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol, + int _Offset) + : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {} + +public: + static const MipsMCSymbolRefExpr *Create(VariantKind Kind, + const MCSymbol *Symbol, int Offset, + MCContext &Ctx); + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const MipsMCSymbolRefExpr *) { return true; } + + int getOffset() const { return Offset; } + void setOffset(int O) { Offset = O; } +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index df40e6c748a6..dbb7a6744224 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -27,6 +27,7 @@ namespace llvm { class MipsFunctionInfo : public MachineFunctionInfo { private: + MachineFunction& MF; /// SRetReturnReg - Some subtargets require that sret lowering includes /// returning the value of the returned struct in a register. This field /// holds the virtual register into which the sret argument is passed. @@ -47,6 +48,7 @@ private: // LowerCall except for the frame object for restoring $gp. std::pair<int, int> InArgFIRange, OutArgFIRange; int GPFI; // Index of the frame object for restoring $gp + mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap @@ -55,10 +57,10 @@ private: int AtomicFrameIndex; public: MipsFunctionInfo(MachineFunction& MF) - : SRetReturnReg(0), GlobalBaseReg(0), + : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0), - AtomicFrameIndex(-1) + OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), + MaxCallFrameSize(0), AtomicFrameIndex(-1) {} bool isInArgFI(int FI) const { @@ -81,6 +83,16 @@ public: bool needGPSaveRestore() const { return getGPFI(); } bool isGPFI(int FI) const { return GPFI && GPFI == FI; } + // The first call to this function creates a frame object for dynamically + // allocated stack area. + int getDynAllocFI() const { + if (!DynAllocFI) + DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true); + + return DynAllocFI; + } + bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; } + unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index b0984afbebed..24390daff75c 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -35,13 +35,16 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/DebugInfo.h" + +#define GET_REGINFO_TARGET_DESC +#include "MipsGenRegisterInfo.inc" using namespace llvm; MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) - : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - Subtarget(ST), TII(tii) {} + : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {} /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). @@ -176,28 +179,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - int Offset; - - // Calculate final offset. - // - There is no need to change the offset if the frame object is an outgoing - // argument or a $gp restore location, - // - If the frame object is any of the following, its offset must be adjusted - // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex)) - Offset = spOffset; - else - Offset = spOffset + stackSize; - - Offset += MI.getOperand(i-1).getImm(); - - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - unsigned NewReg = 0; - int NewImm = 0; - MachineBasicBlock &MBB = *MI.getParent(); - bool ATUsed; - unsigned FrameReg; const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; int MaxCSFI = -1; @@ -213,42 +194,54 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // 3. Locations for callee-saved registers. // Everything else is referenced relative to whatever register // getFrameRegister() returns. - if (MipsFI->isOutArgFI(FrameIndex) || + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) FrameReg = Mips::SP; else FrameReg = getFrameRegister(MF); - // Offset fits in the 16-bit field - if (Offset < 0x8000 && Offset >= -0x8000) { - NewReg = FrameReg; - NewImm = Offset; - ATUsed = false; - } - else { - const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + // Calculate final offset. + // - There is no need to change the offset if the frame object is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int Offset; + + if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) || + MipsFI->isDynAllocFI(FrameIndex)) + Offset = spOffset; + else + Offset = spOffset + stackSize; + + Offset += MI.getOperand(i+1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + // If MI is not a debug value, make sure Offset fits in the 16-bit immediate + // field. + if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) { + MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - int ImmLo = (short)(Offset & 0xffff); int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) + ((Offset & 0x8000) != 0); // FIXME: change this when mips goes MC". - BuildMI(MBB, II, DL, TII->get(Mips::NOAT)); - BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); - BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(FrameReg) - .addReg(Mips::AT); - NewReg = Mips::AT; - NewImm = ImmLo; - - ATUsed = true; - } + BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); + BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi); + BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg) + .addReg(Mips::AT); + FrameReg = Mips::AT; + Offset = (short)(Offset & 0xffff); - // FIXME: change this when mips goes MC". - if (ATUsed) BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); + } - MI.getOperand(i).ChangeToRegister(NewReg, false); - MI.getOperand(i-1).ChangeToImmediate(NewImm); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); } unsigned MipsRegisterInfo:: @@ -283,5 +276,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); } - -#include "MipsGenRegisterInfo.inc" diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 76b0035f1696..646369b5966f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -16,7 +16,9 @@ #include "Mips.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "MipsGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "MipsGenRegisterInfo.inc" namespace llvm { class MipsSubtarget; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index e97d4505eb43..f0db518b754b 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -157,15 +157,15 @@ let Namespace = "Mips" in { // Register Classes //===----------------------------------------------------------------------===// -def CPURegs : RegisterClass<"Mips", [i32], 32, +def CPURegs : RegisterClass<"Mips", [i32], 32, (add // Return Values and Arguments - [V0, V1, A0, A1, A2, A3, + V0, V1, A0, A1, A2, A3, // Not preserved across procedure calls T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, // Callee save S0, S1, S2, S3, S4, S5, S6, S7, // Reserved - ZERO, AT, K0, K1, GP, SP, FP, RA]>; + ZERO, AT, K0, K1, GP, SP, FP, RA)>; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -174,33 +174,25 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, // 32bit fp: // * FGR32 - 16 32-bit even registers // * FGR32 - 32 32-bit registers (single float only mode) -def FGR32 : RegisterClass<"Mips", [f32], 32, - // Return Values and Arguments - [F0, F1, F2, F3, F12, F13, F14, F15, - // Not preserved across procedure calls - F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, - // Callee save - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, - // Reserved - F31]>; +def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>; -def AFGR64 : RegisterClass<"Mips", [f64], 64, +def AFGR64 : RegisterClass<"Mips", [f64], 64, (add // Return Values and Arguments - [D0, D1, D6, D7, + D0, D1, D6, D7, // Not preserved across procedure calls D2, D3, D4, D5, D8, D9, // Callee save D10, D11, D12, D13, D14, // Reserved - D15]> { + D15)> { let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)]; } // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, [FCR31]>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>; +def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; // Hardware registers -def HWRegs : RegisterClass<"Mips", [i32], 32, [HWR29]>; +def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 70747f5da137..6eee3333d584 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -7,27 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file implements the Mips specific subclass of TargetSubtarget. +// This file implements the Mips specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "MipsSubtarget.h" #include "Mips.h" -#include "MipsGenSubtarget.inc" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "MipsGenSubtargetInfo.inc" + using namespace llvm; -MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, - bool little) : +MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool little) : + MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false) { - std::string CPU = "mips1"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "mips1"; MipsArchVersion = Mips1; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); // Is the target system Linux ? if (TT.find("linux") == std::string::npos) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 096bbed7b047..533d4afe073e 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file declares the Mips specific subclass of TargetSubtarget. +// This file declares the Mips specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef MIPSSUBTARGET_H #define MIPSSUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" -#include "llvm/Target/TargetMachine.h" - +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "MipsGenSubtargetInfo.inc" + namespace llvm { +class StringRef; -class MipsSubtarget : public TargetSubtarget { +class MipsSubtarget : public MipsGenSubtargetInfo { public: enum MipsABIEnum { @@ -92,12 +95,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. - MipsSubtarget(const std::string &TT, const std::string &FS, bool little); + MipsSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool little); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool isMips1() const { return MipsArchVersion == Mips1; } bool isMips32() const { return MipsArchVersion >= Mips32; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index cfbb92c9ac16..20b9f4ea3853 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Mips.h" -#include "MipsMCAsmInfo.h" #include "MipsTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" @@ -22,8 +21,6 @@ extern "C" void LLVMInitializeMipsTarget() { // Register the target. RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget); RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget); - RegisterAsmInfo<MipsMCAsmInfo> A(TheMipsTarget); - RegisterAsmInfo<MipsMCAsmInfo> B(TheMipselTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -34,10 +31,11 @@ extern "C" void LLVMInitializeMipsTarget() { // an easier handling. // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: -MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, +MipsTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool isLittle=false): - LLVMTargetMachine(T, TT), - Subtarget(TT, FS, isLittle), + LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS, isLittle), DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), @@ -55,8 +53,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, MipselTargetMachine:: MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) : - MipsTargetMachine(T, TT, FS, true) {} + const std::string &CPU, const std::string &FS) : + MipsTargetMachine(T, TT, CPU, FS, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 102dd8566dde..a021af2ff16d 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -35,7 +35,8 @@ namespace llvm { MipsSelectionDAGInfo TSInfo; public: MipsTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool isLittle); + const std::string &CPU, const std::string &FS, + bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -73,7 +74,7 @@ namespace llvm { class MipselTargetMachine : public MipsTargetMachine { public: MipselTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; } // End llvm namespace diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 331266da30b3..ce08916aaac1 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -1,13 +1,11 @@ set(LLVM_TARGET_DEFINITIONS PTX.td) tablegen(PTXGenAsmWriter.inc -gen-asm-writer) +tablegen(PTXGenCallingConv.inc -gen-callingconv) tablegen(PTXGenDAGISel.inc -gen-dag-isel) -tablegen(PTXGenInstrInfo.inc -gen-instr-desc) -tablegen(PTXGenInstrNames.inc -gen-instr-enums) -tablegen(PTXGenRegisterInfo.inc -gen-register-desc) -tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PTXGenRegisterNames.inc -gen-register-enums) -tablegen(PTXGenSubtarget.inc -gen-subtarget) +tablegen(PTXGenInstrInfo.inc -gen-instr-info) +tablegen(PTXGenRegisterInfo.inc -gen-register-info) +tablegen(PTXGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(PTXCodeGen PTXAsmPrinter.cpp @@ -15,7 +13,6 @@ add_llvm_target(PTXCodeGen PTXISelLowering.cpp PTXInstrInfo.cpp PTXFrameLowering.cpp - PTXMCAsmInfo.cpp PTXMCAsmStreamer.cpp PTXMFInfoExtract.cpp PTXRegisterInfo.cpp @@ -24,3 +21,4 @@ add_llvm_target(PTXCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..df0f63fdba60 --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMPTXDesc + PTXMCTargetDesc.cpp + PTXMCAsmInfo.cpp + ) diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/PTX/MCTargetDesc/Makefile new file mode 100644 index 000000000000..35f5a7b2e6ad --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPTXDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp index b670abdbe095..efefead5341d 100644 --- a/lib/Target/PTX/PTXMCAsmInfo.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp @@ -12,10 +12,15 @@ //===----------------------------------------------------------------------===// #include "PTXMCAsmInfo.h" +#include "llvm/ADT/Triple.h" using namespace llvm; PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::ptx64) + PointerSize = 8; + CommentString = "//"; PrivateGlobalPrefix = "$L__"; diff --git a/lib/Target/PTX/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h index 03f5d66b3d60..03f5d66b3d60 100644 --- a/lib/Target/PTX/PTXMCAsmInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp new file mode 100644 index 000000000000..23f70bd13787 --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -0,0 +1,60 @@ +//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCTargetDesc.h" +#include "PTXMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "PTXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "PTXGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createPTXMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitPTXMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializePTXMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); +} + +static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitPTXMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializePTXMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, + createPTXMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, + createPTXMCSubtargetInfo); +} + +extern "C" void LLVMInitializePTXMCAsmInfo() { + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); +} diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h new file mode 100644 index 000000000000..1003b0b5ece9 --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXMCTARGETDESC_H +#define PTXMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target ThePTX32Target; +extern Target ThePTX64Target; + +} // End llvm namespace + +// Defines symbolic names for PTX registers. +#define GET_REGINFO_ENUM +#include "PTXGenRegisterInfo.inc" + +// Defines symbolic names for the PTX instructions. +#define GET_INSTRINFO_ENUM +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "PTXGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile index 2c40d6994094..93dd38aca7ec 100644 --- a/lib/Target/PTX/Makefile +++ b/lib/Target/PTX/Makefile @@ -13,14 +13,12 @@ TARGET = PTX # Make sure that tblgen is run, first thing. BUILT_SOURCES = PTXGenAsmWriter.inc \ + PTXGenCallingConv.inc \ PTXGenDAGISel.inc \ PTXGenInstrInfo.inc \ - PTXGenInstrNames.inc \ PTXGenRegisterInfo.inc \ - PTXGenRegisterInfo.h.inc \ - PTXGenRegisterNames.inc \ - PTXGenSubtarget.inc + PTXGenSubtargetInfo.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index ec2be9291a04..28cab2429c81 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -15,6 +15,7 @@ #ifndef PTX_H #define PTX_H +#include "MCTargetDesc/PTXMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -42,14 +43,6 @@ namespace llvm { FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); - extern Target ThePTX32Target; - extern Target ThePTX64Target; } // namespace llvm; -// Defines symbolic names for PTX registers. -#include "PTXGenRegisterNames.inc" - -// Defines symbolic names for the PTX instructions. -#include "PTXGenInstrNames.inc" - #endif // PTX_H diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 231866a08953..f6fbe9fffc6f 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -16,7 +16,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// Subtarget Features. +// Subtarget Features //===----------------------------------------------------------------------===// //===- Architectural Features ---------------------------------------------===// @@ -30,34 +30,54 @@ def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", //===- PTX Version --------------------------------------------------------===// def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", - "Use PTX Language Version 2.0", - []>; + "Use PTX Language Version 2.0">; def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", - "Use PTX Language Version 2.1", - [FeaturePTX20]>; + "Use PTX Language Version 2.1">; def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", - "Use PTX Language Version 2.2", - [FeaturePTX21]>; + "Use PTX Language Version 2.2">; def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", - "Use PTX Language Version 2.3", - [FeaturePTX22]>; - -//===- PTX Shader Model ---------------------------------------------------===// - -def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", - "Enable Shader Model 1.0 compliance">; -def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", - "Enable Shader Model 1.3 compliance", - [FeatureSM10, FeatureDouble]>; -def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", - "Enable Shader Model 2.0 compliance", - [FeatureSM13]>; + "Use PTX Language Version 2.3">; + +//===- PTX Target ---------------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", + "Use Shader Model 1.0">; +def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", + "Use Shader Model 1.1">; +def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", + "Use Shader Model 1.2">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", + "Use Shader Model 1.3">; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", + "Use Shader Model 2.0">; +def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", + "Use Shader Model 2.1">; +def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", + "Use Shader Model 2.2">; +def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", + "Use Shader Model 2.3">; + +def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", + "PTX_COMPUTE_1_0", + "Use Compute Compatibility 1.0">; +def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", + "PTX_COMPUTE_1_1", + "Use Compute Compatibility 1.1">; +def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", + "PTX_COMPUTE_1_2", + "Use Compute Compatibility 1.2">; +def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", + "PTX_COMPUTE_1_3", + "Use Compute Compatibility 1.3">; +def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", + "PTX_COMPUTE_2_0", + "Use Compute Compatibility 2.0">; //===----------------------------------------------------------------------===// -// PTX supported processors. +// PTX supported processors //===----------------------------------------------------------------------===// class Proc<string Name, list<SubtargetFeature> Features> @@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features> def : Proc<"generic", []>; +// Processor definitions for compute/shader models +def : Proc<"compute_10", [FeatureCOMPUTE10]>; +def : Proc<"compute_11", [FeatureCOMPUTE11]>; +def : Proc<"compute_12", [FeatureCOMPUTE12]>; +def : Proc<"compute_13", [FeatureCOMPUTE13]>; +def : Proc<"compute_20", [FeatureCOMPUTE20]>; +def : Proc<"sm_10", [FeatureSM10]>; +def : Proc<"sm_11", [FeatureSM11]>; +def : Proc<"sm_12", [FeatureSM12]>; +def : Proc<"sm_13", [FeatureSM13]>; +def : Proc<"sm_20", [FeatureSM20]>; +def : Proc<"sm_21", [FeatureSM21]>; +def : Proc<"sm_22", [FeatureSM22]>; +def : Proc<"sm_23", [FeatureSM23]>; + +// Processor definitions for common GPU architectures +def : Proc<"g80", [FeatureSM10]>; +def : Proc<"gt200", [FeatureSM13]>; +def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; +def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -72,6 +113,12 @@ def : Proc<"generic", []>; include "PTXRegisterInfo.td" //===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PTXCallingConv.td" + +//===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 29c4781de654..2848d5460eee 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -22,9 +22,12 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -34,6 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -62,8 +66,13 @@ public: const char *Modifier = 0); void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); + void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + // autogen'd. void printInstruction(const MachineInstr *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); @@ -71,20 +80,23 @@ public: private: void EmitVariableDeclaration(const GlobalVariable *gv); void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; }; // class PTXAsmPrinter } // namespace static const char PARAM_PREFIX[] = "__param_"; +static const char RETURN_PREFIX[] = "__ret_"; static const char *getRegisterTypeName(unsigned RegNo) { #define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; - TEST_REGCLS(Preds, pred); - TEST_REGCLS(RRegu16, u16); - TEST_REGCLS(RRegu32, u32); - TEST_REGCLS(RRegu64, u64); - TEST_REGCLS(RRegf32, f32); - TEST_REGCLS(RRegf64, f64); + TEST_REGCLS(RegPred, pred); + TEST_REGCLS(RegI16, b16); + TEST_REGCLS(RegI32, b32); + TEST_REGCLS(RegI64, b64); + TEST_REGCLS(RegF32, b32); + TEST_REGCLS(RegF64, b64); #undef TEST_REGCLS llvm_unreachable("Not in any register class!"); @@ -162,6 +174,27 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + (ST.supportsDouble() ? "" : ", map_f64_to_f32"))); + // .address_size directive is optional, but it must immediately follow + // the .target directive if present within a module + if (ST.supportsPTX23()) { + std::string addrSize = ST.is64Bit() ? "64" : "32"; + OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize)); + } + + OutStreamer.AddBlankLine(); + + // Define any .file directives + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); + GetOrCreateSourceID(FN, Dir); + } + OutStreamer.AddBlankLine(); // declare global variables @@ -194,6 +227,21 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { def += ';'; OutStreamer.EmitRawText(Twine(def)); } + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); + DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() + << " frame object(s)\n"); + for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { + DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); + if (FrameInfo->getObjectSize(i) > 0) { + std::string def = "\t.reg .b"; + def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits + def += " s"; + def += utostr(i); + def += ";"; + OutStreamer.EmitRawText(Twine(def)); + } + } } void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -202,6 +250,54 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { raw_string_ostream OS(str); + DebugLoc DL = MI->getDebugLoc(); + if (!DL.isUnknown()) { + + const MDNode *S = DL.getScope(MF->getFunction()->getContext()); + + // This is taken from DwarfDebug.cpp, which is conveniently not a public + // LLVM class. + StringRef Fn; + StringRef Dir; + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Fn = CU.getFilename(); + Dir = CU.getDirectory(); + } else if (Scope.isFile()) { + DIFile F(S); + Fn = F.getFilename(); + Dir = F.getDirectory(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Fn = SP.getFilename(); + Dir = SP.getDirectory(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Fn = DB.getFilename(); + Dir = DB.getDirectory(); + } else + assert(0 && "Unexpected scope info"); + + Src = GetOrCreateSourceID(Fn, Dir); + } + OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), + 0, 0, 0, Fn); + + const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); + + OS << "\t.loc "; + OS << utostr(MDL.getFileNum()); + OS << " "; + OS << utostr(MDL.getLine()); + OS << " "; + OS << utostr(MDL.getColumn()); + OS << "\n"; + } + + // Emit predicate printPredicateOperand(MI, OS); @@ -275,6 +371,11 @@ void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; } +void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +} + void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(gv)) @@ -311,7 +412,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { decl += ".b8 "; decl += gvsym->getName(); decl += "["; - + if (elementTy->isArrayTy()) { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); @@ -320,7 +421,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { elementTy = arrayTy->getElementType(); unsigned numElements = arrayTy->getNumElements(); - + while (elementTy->isArrayTy()) { arrayTy = dyn_cast<const ArrayType>(elementTy); @@ -336,17 +437,17 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // Compute the size of the array, in bytes. uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3) * numElements; - + decl += utostr(arraySize); } - + decl += "]"; - + // handle string constants (assume ConstantArray means string) - + if (gv->hasInitializer()) { - Constant *C = gv->getInitializer(); + const Constant *C = gv->getInitializer(); if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { decl += " = {"; @@ -354,10 +455,11 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { if (i > 0) decl += ","; - - decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + + decl += "0x" + + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); } - + decl += "}"; } } @@ -393,17 +495,25 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); const bool isKernel = MFI->isKernel(); - unsigned reg; + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); std::string decl = isKernel ? ".entry" : ".func"; - // Print return register - reg = MFI->retReg(); - if (!isKernel && reg != PTX::NoRegister) { - decl += " (.reg ."; // FIXME: could it return in .param space? - decl += getRegisterTypeName(reg); - decl += " "; - decl += getRegisterName(reg); + unsigned cnt = 0; + + if (!isKernel) { + decl += " ("; + for (PTXMachineFunctionInfo::ret_iterator + i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); + } decl += ")"; } @@ -411,40 +521,31 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " "; decl += CurrentFnSym->getName().str(); - // Print parameter list - if (!MFI->argRegEmpty()) { - decl += " ("; - if (isKernel) { - unsigned cnt = 0; - for(PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - reg = *i; - assert(reg != PTX::NoRegister && "Not a valid register!"); - if (i != b) - decl += ", "; - decl += ".param ."; - decl += getRegisterTypeName(reg); - decl += " "; - decl += PARAM_PREFIX; - decl += utostr(++cnt); - } + decl += " ("; + + cnt = 0; + + // Print parameters + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + decl += ".param .b"; + decl += utostr(*i); + decl += " "; + decl += PARAM_PREFIX; + decl += utostr(++cnt); } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - reg = *i; - assert(reg != PTX::NoRegister && "Not a valid register!"); - if (i != b) - decl += ", "; - decl += ".reg ."; - decl += getRegisterTypeName(reg); - decl += " "; - decl += getRegisterName(reg); - } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); } - decl += ")"; } + decl += ")"; OutStreamer.EmitRawText(Twine(decl)); } @@ -468,6 +569,33 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { } } +unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !sys::path::is_absolute(FileName)) { + SmallString<128> FullPathName = DirName; + sys::path::append(FullPathName, FileName); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } + + StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); + if (Entry.getValue()) + return Entry.getValue(); + + unsigned SrcId = SourceIdMap.size(); + Entry.setValue(SrcId); + + // Print out a .file directive to specify files for .loc directives. + OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); + + return SrcId; +} + #include "PTXGenAsmWriter.inc" // Force static initialization. diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td new file mode 100644 index 000000000000..3e3ff4896621 --- /dev/null +++ b/lib/Target/PTX/PTXCallingConv.td @@ -0,0 +1,29 @@ + +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, + CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, + CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, + CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, + CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> +]>; diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index b3c85da7b446..9adfa624b29e 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "PTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -41,8 +42,6 @@ class PTXDAGToDAGISel : public SelectionDAGISel { #include "PTXGenDAGISel.inc" private: - SDNode *SelectREAD_PARAM(SDNode *Node); - // We need this only because we can't match intruction BRAdp // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td SDNode *SelectBRCOND(SDNode *Node); @@ -67,8 +66,6 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { switch (Node->getOpcode()) { - case PTXISD::READ_PARAM: - return SelectREAD_PARAM(Node); case ISD::BRCOND: return SelectBRCOND(Node); default: @@ -76,37 +73,6 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { } } -SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { - SDValue index = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); - unsigned opcode; - - if (index.getOpcode() != ISD::TargetConstant) - llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); - - if (Node->getValueType(0) == MVT::i16) { - opcode = PTX::LDpiU16; - } - else if (Node->getValueType(0) == MVT::i32) { - opcode = PTX::LDpiU32; - } - else if (Node->getValueType(0) == MVT::i64) { - opcode = PTX::LDpiU64; - } - else if (Node->getValueType(0) == MVT::f32) { - opcode = PTX::LDpiF32; - } - else if (Node->getValueType(0) == MVT::f64) { - opcode = PTX::LDpiF64; - } - else { - llvm_unreachable("Unknown parameter type for ld.param"); - } - - return PTXInstrInfo:: - GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index); -} - SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { assert(Node->getNumOperands() >= 3); diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e9b1d8c3bbef..6fcf710e3f1f 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -15,7 +15,9 @@ #include "PTXISelLowering.h" #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" +#include "PTXSubtarget.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -24,49 +26,80 @@ using namespace llvm; +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "PTXGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. - addRegisterClass(MVT::i1, PTX::PredsRegisterClass); - addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass); - addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass); - addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass); - addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass); - addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass); + addRegisterClass(MVT::i1, PTX::RegPredRegisterClass); + addRegisterClass(MVT::i16, PTX::RegI16RegisterClass); + addRegisterClass(MVT::i32, PTX::RegI32RegisterClass); + addRegisterClass(MVT::i64, PTX::RegI64RegisterClass); + addRegisterClass(MVT::f32, PTX::RegF32RegisterClass); + addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); setBooleanContents(ZeroOrOneBooleanContent); + setMinFunctionAlignment(2); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + //////////////////////////////////// + /////////// Expansion ////////////// + //////////////////////////////////// + + // (any/zero/sign) extload => load + (any/zero/sign) extend - // Turn i16 (z)extload into load + (z)extend setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); - - // Turn f32 extload into load + fextend - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - // Turn f64 truncstore into trunc + store. - setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // f32 extload => load + fextend + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // f64 truncstore => trunc + store + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // sign_extend_inreg => sign_extend + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // br_cc => brcond - // Customize translation of memory addresses - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - - // Expand BR_CC into BRCOND setOperationAction(ISD::BR_CC, MVT::Other, Expand); - // Expand SELECT_CC into SETCC + // select_cc => setcc + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - // need to lower SETCC of Preds into bitwise logic + //////////////////////////////////// + //////////// Legal ///////////////// + //////////////////////////////////// + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + //////////////////////////////////// + //////////// Custom //////////////// + //////////////////////////////////// + + // customise setcc to use bitwise logic if possible + setOperationAction(ISD::SETCC, MVT::i1, Custom); - setMinFunctionAlignment(2); + // customize translation of memory addresses + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // Compute derived properties from the register classes computeRegisterProperties(); @@ -93,8 +126,10 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { llvm_unreachable("Unknown opcode"); case PTXISD::COPY_ADDRESS: return "PTXISD::COPY_ADDRESS"; - case PTXISD::READ_PARAM: - return "PTXISD::READ_PARAM"; + case PTXISD::LOAD_PARAM: + return "PTXISD::LOAD_PARAM"; + case PTXISD::STORE_PARAM: + return "PTXISD::STORE_PARAM"; case PTXISD::EXIT: return "PTXISD::EXIT"; case PTXISD::RET: @@ -113,18 +148,18 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Op2 = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - + // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic - + if (Op1.getOpcode() == ISD::Constant && (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || cast<ConstantSDNode>(Op1)->isNullValue()) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); } - + return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); } @@ -149,27 +184,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // Calling Convention Implementation //===----------------------------------------------------------------------===// -namespace { -struct argmap_entry { - MVT::SimpleValueType VT; - TargetRegisterClass *RC; - TargetRegisterClass::iterator loc; - - argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC) - : VT(_VT), RC(_RC), loc(_RC->begin()) {} - - void reset() { loc = RC->begin(); } - bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } -} argmap[] = { - argmap_entry(MVT::i1, PTX::PredsRegisterClass), - argmap_entry(MVT::i16, PTX::RRegu16RegisterClass), - argmap_entry(MVT::i32, PTX::RRegu32RegisterClass), - argmap_entry(MVT::i64, PTX::RRegu64RegisterClass), - argmap_entry(MVT::f32, PTX::RRegf32RegisterClass), - argmap_entry(MVT::f64, PTX::RRegf64RegisterClass) -}; -} // end anonymous namespace - SDValue PTXTargetLowering:: LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -181,6 +195,7 @@ SDValue PTXTargetLowering:: if (isVarArg) llvm_unreachable("PTX does not support varargs"); MachineFunction &MF = DAG.getMachineFunction(); + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); switch (CallConv) { @@ -195,44 +210,76 @@ SDValue PTXTargetLowering:: break; } - // Make sure we don't add argument registers twice - if (MFI->isDoneAddArg()) - llvm_unreachable("cannot add argument registers twice"); - - // Reset argmap before allocation - for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap); - i != e; ++ i) - i->reset(); - - for (int i = 0, e = Ins.size(); i != e; ++ i) { - MVT::SimpleValueType VT = Ins[i].VT.SimpleTy; - - struct argmap_entry *entry = std::find(argmap, - argmap + array_lengthof(argmap), VT); - if (entry == argmap + array_lengthof(argmap)) - llvm_unreachable("Type of argument is not supported"); - - if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass) - llvm_unreachable("cannot pass preds to kernel"); - - MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); - - unsigned preg = *++(entry->loc); // allocate start from register 1 - unsigned vreg = RegInfo.createVirtualRegister(entry->RC); - RegInfo.addLiveIn(preg, vreg); - - MFI->addArgReg(preg); - - SDValue inval; - if (MFI->isKernel()) - inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain, - DAG.getTargetConstant(i, MVT::i32)); - else - inval = DAG.getCopyFromReg(Chain, dl, vreg, VT); - InVals.push_back(inval); + // We do one of two things here: + // IsKernel || SM >= 2.0 -> Use param space for arguments + // SM < 2.0 -> Use registers for arguments + if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { + // We just need to emit the proper LOAD_PARAM ISDs + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + + assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && + "Kernels cannot take pred operands"); + + SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + DAG.getTargetConstant(i, MVT::i32)); + InVals.push_back(ArgValue); + + // Instead of storing a physical register in our argument list, we just + // store the total size of the parameter, in bits. The ASM printer + // knows how to process this. + MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); + } + } + else { + // For device functions, we use the PTX calling convention to do register + // assignments then create CopyFromReg ISDs for the allocated registers + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + + CCValAssign& VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + TargetRegisterClass* TRC = 0; + + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } + + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); + MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + InVals.push_back(ArgValue); + + MFI->addArgReg(VA.getLocReg()); + } } - - MFI->doneAddArg(); return Chain; } @@ -254,51 +301,47 @@ SDValue PTXTargetLowering:: assert(Outs.size() == 0 && "Kernel must return void."); return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); case CallingConv::PTX_Device: - assert(Outs.size() <= 1 && "Can at most return one value."); + //assert(Outs.size() <= 1 && "Can at most return one value."); break; } - // PTX_Device - - // return void - if (Outs.size() == 0) - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); SDValue Flag; - unsigned reg; - if (Outs[0].VT == MVT::i16) { - reg = PTX::RH0; - } - else if (Outs[0].VT == MVT::i32) { - reg = PTX::R0; - } - else if (Outs[0].VT == MVT::i64) { - reg = PTX::RD0; - } - else if (Outs[0].VT == MVT::f32) { - reg = PTX::F0; - } - else { - assert(Outs[0].VT == MVT::f64 && "Can return only basic types"); - reg = PTX::FD0; - } + // Even though we could use the .param space for return arguments for + // device functions if SM >= 2.0 and the number of return arguments is + // only 1, we just always use registers since this makes the codegen + // easier. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); - MachineFunction &MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - MFI->setRetReg(reg); + CCInfo.AnalyzeReturn(Outs, RetCC_PTX); + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign& VA = RVLocs[i]; - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) - DAG.getMachineFunction().getRegInfo().addLiveOut(reg); + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); - // Copy the result values into the output registers - Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag); + unsigned Reg = VA.getLocReg(); - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); + DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); + + MFI->addRetReg(Reg); + } + + if (Flag.getNode() == 0) { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + } + else { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); + } } diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index 225c0004a913..43185416e1fc 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -24,12 +24,13 @@ class PTXTargetMachine; namespace PTXISD { enum NodeType { FIRST_NUMBER = ISD::BUILTIN_OP_END, - READ_PARAM, + LOAD_PARAM, + STORE_PARAM, EXIT, RET, COPY_ADDRESS }; -} // namespace PTXISD +} // namespace PTXISD class PTXTargetLowering : public TargetLowering { public: @@ -40,7 +41,7 @@ class PTXTargetLowering : public TargetLowering { virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -58,9 +59,9 @@ class PTXTargetLowering : public TargetLowering { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - + private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; }; // class PTXTargetLowering diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td index e4e099987e8d..8cee351ee0df 100644 --- a/lib/Target/PTX/PTXInstrFormats.td +++ b/lib/Target/PTX/PTXInstrFormats.td @@ -9,7 +9,7 @@ // PTX Predicate operand, default to (0, 0) = (zero-reg, always). // Leave PrintMethod empty; predicate printing is defined elsewhere. -def pred : PredicateOperand<OtherVT, (ops Preds, i32imm), +def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), (ops (i1 zero_reg), (i32 0))>; let Namespace = "PTX" in { diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index a12a6d01afa7..425265a2fdb7 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -18,27 +18,29 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; - +#define GET_INSTRINFO_CTOR #include "PTXGenInstrInfo.inc" +using namespace llvm; + PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) - : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)), + : PTXGenInstrInfo(), RI(_TM, *this), TM(_TM) {} static const struct map_entry { const TargetRegisterClass *cls; const int opcode; } map[] = { - { &PTX::RRegu16RegClass, PTX::MOVU16rr }, - { &PTX::RRegu32RegClass, PTX::MOVU32rr }, - { &PTX::RRegu64RegClass, PTX::MOVU64rr }, - { &PTX::RRegf32RegClass, PTX::MOVF32rr }, - { &PTX::RRegf64RegClass, PTX::MOVF64rr }, - { &PTX::PredsRegClass, PTX::MOVPREDrr } + { &PTX::RegI16RegClass, PTX::MOVU16rr }, + { &PTX::RegI32RegClass, PTX::MOVU32rr }, + { &PTX::RegI64RegClass, PTX::MOVU64rr }, + { &PTX::RegF32RegClass, PTX::MOVF32rr }, + { &PTX::RegF64RegClass, PTX::MOVF64rr }, + { &PTX::RegPredRegClass, PTX::MOVPREDrr } }; void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -47,8 +49,8 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { if (map[i].cls->contains(DstReg, SrcReg)) { - const TargetInstrDesc &TID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg). + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). addReg(SrcReg, getKillRegState(KillSrc)); AddDefaultPredicate(MI); return; @@ -69,8 +71,8 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) if (DstRC == map[i].cls) { - const TargetInstrDesc &TID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg); + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); AddDefaultPredicate(MI); return true; } @@ -155,7 +157,7 @@ DefinesPredicate(MachineInstr *MI, const MachineOperand &MO = MI->getOperand(0); - if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass) + if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass) return false; Pred.push_back(MO); @@ -178,13 +180,13 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::const_iterator iter = MBB.end(); const MachineInstr& instLast1 = *--iter; - const TargetInstrDesc &desc1 = instLast1.getDesc(); + const MCInstrDesc &desc1 = instLast1.getDesc(); // for special case that MBB has only 1 instruction const bool IsSizeOne = MBB.size() == 1; // if IsSizeOne is true, *--iter and instLast2 are invalid // we put a dummy value in instLast2 and desc2 since they are used const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); + const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); DEBUG(dbgs() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); @@ -288,6 +290,77 @@ InsertBranch(MachineBasicBlock &MBB, } } +// Memory operand folding for spills +void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineInstr& MI = *MII; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "storeRegToStackSlot: " << MI); + + int OpCode; + + // Select the appropriate opcode based on the register class + if (RC == PTX::RegI16RegisterClass) { + OpCode = PTX::STACKSTOREI16; + } else if (RC == PTX::RegI32RegisterClass) { + OpCode = PTX::STACKSTOREI32; + } else if (RC == PTX::RegI64RegisterClass) { + OpCode = PTX::STACKSTOREI32; + } else if (RC == PTX::RegF32RegisterClass) { + OpCode = PTX::STACKSTOREF32; + } else if (RC == PTX::RegF64RegisterClass) { + OpCode = PTX::STACKSTOREF64; + } else { + llvm_unreachable("Unknown PTX register class!"); + } + + // Build the store instruction (really a mov) + MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); + MIB.addFrameIndex(FrameIdx); + MIB.addReg(SrcReg); + + AddDefaultPredicate(MIB); +} + +void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineInstr& MI = *MII; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "loadRegToStackSlot: " << MI); + + int OpCode; + + // Select the appropriate opcode based on the register class + if (RC == PTX::RegI16RegisterClass) { + OpCode = PTX::STACKLOADI16; + } else if (RC == PTX::RegI32RegisterClass) { + OpCode = PTX::STACKLOADI32; + } else if (RC == PTX::RegI64RegisterClass) { + OpCode = PTX::STACKLOADI32; + } else if (RC == PTX::RegF32RegisterClass) { + OpCode = PTX::STACKLOADF32; + } else if (RC == PTX::RegF64RegisterClass) { + OpCode = PTX::STACKLOADF64; + } else { + llvm_unreachable("Unknown PTX register class!"); + } + + // Build the load instruction (really a mov) + MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); + MIB.addReg(DestReg); + MIB.addFrameIndex(FrameIdx); + + AddDefaultPredicate(MIB); +} + // static helper routines MachineSDNode *PTXInstrInfo:: @@ -316,7 +389,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { } bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - const TargetInstrDesc &desc = inst.getDesc(); + const MCInstrDesc &desc = inst.getDesc(); return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); } diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h index a04be7728f88..871f1ac8d376 100644 --- a/lib/Target/PTX/PTXInstrInfo.h +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -17,6 +17,9 @@ #include "PTXRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#define GET_INSTRINFO_HEADER +#include "PTXGenInstrInfo.inc" + namespace llvm { class PTXTargetMachine; @@ -24,7 +27,7 @@ class MachineSDNode; class SDValue; class SelectionDAG; -class PTXInstrInfo : public TargetInstrInfoImpl { +class PTXInstrInfo : public PTXGenInstrInfo { private: const PTXRegisterInfo RI; PTXTargetMachine &TM; @@ -84,6 +87,29 @@ public: const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; + // Memory operand folding for spills + // TODO: Implement this eventually and get rid of storeRegToStackSlot and + // loadRegFromStackSlot. Doing so will get rid of the "stack" registers + // we currently use to spill, though I doubt the overall effect on ptxas + // output will be large. I have yet to see a case where ptxas is unable + // to see through the "stack" register usage and hence generates + // efficient code anyway. + // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + // MachineInstr* MI, + // const SmallVectorImpl<unsigned> &Ops, + // int FrameIndex) const; + + virtual void storeRegToStackSlot(MachineBasicBlock& MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass* RC, + const TargetRegisterInfo* TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + // static helper routines static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index d5d08bed2247..6bfe906d40ab 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; // Shader Model Support -def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">; -def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">; -def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; -def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; +def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; +def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; +def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; +def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; // PTX Version Support def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; @@ -143,11 +143,11 @@ def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; // Address operands def MEMri32 : Operand<i32> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegu32, i32imm); + let MIOperandInfo = (ops RegI32, i32imm); } def MEMri64 : Operand<i64> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegu64, i64imm); + let MIOperandInfo = (ops RegI64, i64imm); } def MEMii32 : Operand<i32> { let PrintMethod = "printMemOperand"; @@ -163,6 +163,10 @@ def MEMpi : Operand<i32> { let PrintMethod = "printParamOperand"; let MIOperandInfo = (ops i32imm); } +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} // Branch & call targets have OtherVT type. def brtarget : Operand<OtherVT>; @@ -180,181 +184,190 @@ def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; def PTXexit : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; def PTXret - : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>; + : SDNode<"PTXISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PTXcopyaddress : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// //===- Floating-Point Instructions - 2 Operand Form -----------------------===// multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { - def rr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), + def rr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), !strconcat(opcstr, ".f32\t$d, $a"), - [(set RRegf32:$d, (opnode RRegf32:$a))]>; - def ri32 : InstPTX<(outs RRegf32:$d), + [(set RegF32:$d, (opnode RegF32:$a))]>; + def ri32 : InstPTX<(outs RegF32:$d), (ins f32imm:$a), !strconcat(opcstr, ".f32\t$d, $a"), - [(set RRegf32:$d, (opnode fpimm:$a))]>; - def rr64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), + [(set RegF32:$d, (opnode fpimm:$a))]>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), !strconcat(opcstr, ".f64\t$d, $a"), - [(set RRegf64:$d, (opnode RRegf64:$a))]>; - def ri64 : InstPTX<(outs RRegf64:$d), + [(set RegF64:$d, (opnode RegF64:$a))]>; + def ri64 : InstPTX<(outs RegF64:$d), (ins f64imm:$a), !strconcat(opcstr, ".f64\t$d, $a"), - [(set RRegf64:$d, (opnode fpimm:$a))]>; + [(set RegF64:$d, (opnode fpimm:$a))]>; } //===- Floating-Point Instructions - 3 Operand Form -----------------------===// multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { - def rr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b), + def rr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; - def ri32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, f32imm:$b), + [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>; + def ri32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; - def rr64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b), + [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>; - def ri64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, f64imm:$b), + [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>; + def ri64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>; + [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>; } //===- Floating-Point Instructions - 4 Operand Form -----------------------===// multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { - def rrr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b, RRegf32:$c), + def rrr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b, RegF32:$c), !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, - RRegf32:$b), - RRegf32:$c))]>; - def rri32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b, f32imm:$c), + [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, + RegF32:$b), + RegF32:$c))]>; + def rri32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b, f32imm:$c), !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, - RRegf32:$b), + [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, + RegF32:$b), fpimm:$c))]>; - def rrr64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b, RRegf64:$c), + def rrr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b, RegF64:$c), !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, - RRegf64:$b), - RRegf64:$c))]>; - def rri64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b, f64imm:$c), + [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, + RegF64:$b), + RegF64:$c))]>; + def rri64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b, f64imm:$c), !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, - RRegf64:$b), + [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, + RegF64:$b), fpimm:$c))]>; } multiclass INT3<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, RRegu16:$b), + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; - def ri16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, i16imm:$b), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, RRegu32:$b), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; - def ri32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, i32imm:$b), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, RRegu64:$b), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; - def ri64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, i64imm:$b), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } multiclass PTX_LOGIC<string opcstr, SDNode opnode> { - def ripreds : InstPTX<(outs Preds:$d), - (ins Preds:$a, i1imm:$b), + def ripreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, i1imm:$b), !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set Preds:$d, (opnode Preds:$a, imm:$b))]>; - def rrpreds : InstPTX<(outs Preds:$d), - (ins Preds:$a, Preds:$b), + [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>; + def rrpreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, RegPred:$b), !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>; - def rr16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, RRegu16:$b), + [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>; + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; - def ri16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, i16imm:$b), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, RRegu32:$b), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; - def ri32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, i32imm:$b), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, RRegu64:$b), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; - def ri64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, i64imm:$b), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } multiclass INT3ntnc<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, RRegu16:$b), + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; - def rr32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, RRegu32:$b), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; - def rr64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, RRegu64:$b), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; - def ri16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, i16imm:$b), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; - def ri32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, i32imm:$b), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; - def ri64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, i64imm:$b), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; - def ir16 : InstPTX<(outs RRegu16:$d), - (ins i16imm:$a, RRegu16:$b), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; + def ir16 : InstPTX<(outs RegI16:$d), + (ins i16imm:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>; - def ir32 : InstPTX<(outs RRegu32:$d), - (ins i32imm:$a, RRegu32:$b), + [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>; + def ir32 : InstPTX<(outs RegI32:$d), + (ins i32imm:$a, RegI32:$b), !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>; - def ir64 : InstPTX<(outs RRegu64:$d), - (ins i64imm:$a, RRegu64:$b), + [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>; + def ir64 : InstPTX<(outs RegI64:$d), + (ins i64imm:$a, RegI64:$b), !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; + [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; } multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, @@ -362,63 +375,63 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, // TODO support 5-operand format: p|q, a, b, c def rr - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>; + [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>; def ri - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>; + [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>; def rr_and_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_and_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_or_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_or_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_xor_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_xor_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_and_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; def ri_and_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; def rr_or_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; def ri_or_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; def rr_xor_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; def ri_xor_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; } multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, @@ -426,74 +439,74 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, // TODO support 5-operand format: p|q, a, b, c def rr_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>; + [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>; def rr_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>; + [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; def rr_and_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_and_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_or_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_or_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_xor_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_xor_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_and_not_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; def rr_and_not_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; def rr_or_not_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; def rr_or_not_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; def rr_xor_not_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; def rr_xor_not_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; } multiclass PTX_SELP<RegisterClass RC, string regclsname> { def rr - : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c), + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>; + [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; } multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { @@ -524,11 +537,11 @@ multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_lo } multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { - defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>; - defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>; - defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>; - defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>; - defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>; + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; } multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { @@ -559,11 +572,11 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_st } multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { - defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>; - defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>; - defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>; - defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>; - defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>; + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; } //===----------------------------------------------------------------------===// @@ -584,44 +597,59 @@ defm REM : INT3<"rem", urem>; defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; // Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add", fadd>; -defm FSUB : PTX_FLOAT_3OP<"sub", fsub>; -defm FMUL : PTX_FLOAT_3OP<"mul", fmul>; - -// TODO: Allow user selection of rounding modes for fdiv. -// For division, we need to have f32 and f64 differently. -// For f32, we just always use .approx since it is supported on all hardware -// for PTX 1.4+, which is our minimum target. -def FDIVrr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b), - "div.approx.f32\t$d, $a, $b", - [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>; -def FDIVri32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, f32imm:$b), - "div.approx.f32\t$d, $a, $b", - [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>; - -// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0. -def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b), +defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>; +defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>; +defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>; + +// For floating-point division: +// SM_13+ defaults to .rn for f32 and f64, +// SM10 must *not* provide a rounding + +// TODO: +// - Allow user selection of rounding modes for fdiv +// - Add support for -prec-div=false (.approx) + +def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + "div.rn.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVri32SM13 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + "div.rn.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + "div.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, + Requires<[FDivNoRoundingMode]>; +def FDIVri32SM10 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + "div.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, + Requires<[FDivNoRoundingMode]>; + +def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), "div.rn.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, - Requires<[SupportsSM13]>; -def FDIVri64SM13 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, f64imm:$b), + [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVri64SM13 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), "div.rn.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, - Requires<[SupportsSM13]>; -def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b), + [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), "div.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, - Requires<[DoesNotSupportSM13]>; -def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, f64imm:$b), + [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, + Requires<[FDivNoRoundingMode]>; +def FDIVri64SM10 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), "div.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, - Requires<[DoesNotSupportSM13]>; + [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, + Requires<[FDivNoRoundingMode]>; @@ -633,40 +661,42 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), // In the short term, mad is supported on all PTX versions and we use a // default rounding mode no matter what shader model or PTX version. // TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>; +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, + Requires<[FMadNeedsRoundingMode, SupportsFMA]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, + Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// -def FSQRT32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), +def FSQRT32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), "sqrt.rn.f32\t$d, $a", - [(set RRegf32:$d, (fsqrt RRegf32:$a))]>; + [(set RegF32:$d, (fsqrt RegF32:$a))]>; -def FSQRT64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), +def FSQRT64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), "sqrt.rn.f64\t$d, $a", - [(set RRegf64:$d, (fsqrt RRegf64:$a))]>; + [(set RegF64:$d, (fsqrt RegF64:$a))]>; -def FSIN32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), +def FSIN32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), "sin.approx.f32\t$d, $a", - [(set RRegf32:$d, (fsin RRegf32:$a))]>; + [(set RegF32:$d, (fsin RegF32:$a))]>; -def FSIN64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), +def FSIN64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), "sin.approx.f64\t$d, $a", - [(set RRegf64:$d, (fsin RRegf64:$a))]>; + [(set RegF64:$d, (fsin RegF64:$a))]>; -def FCOS32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), +def FCOS32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), "cos.approx.f32\t$d, $a", - [(set RRegf32:$d, (fcos RRegf32:$a))]>; + [(set RegF32:$d, (fcos RegF32:$a))]>; -def FCOS64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), +def FCOS64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), "cos.approx.f64\t$d, $a", - [(set RRegf64:$d, (fcos RRegf64:$a))]>; + [(set RegF64:$d, (fcos RegF64:$a))]>; ///===- Comparison and Selection Instructions -----------------------------===// @@ -675,56 +705,68 @@ def FCOS64 : InstPTX<(outs RRegf64:$d), // Compare u16 -defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">; -defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">; -defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">; -defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">; -defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">; -defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">; +defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">; +defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">; +defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">; +defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">; +defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">; +defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">; +defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">; +defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">; +defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">; +defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">; // Compare u32 -defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">; -defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">; -defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">; -defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">; -defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">; -defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">; +defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">; +defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">; +defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">; +defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">; +defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">; +defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">; +defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">; +defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">; +defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">; +defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">; // Compare u64 -defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">; -defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">; -defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">; -defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">; -defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">; -defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">; +defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">; +defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">; +defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">; +defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">; +defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">; +defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">; +defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">; +defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">; +defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">; +defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; // Compare f32 -defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">; -defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">; -defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">; -defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">; -defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">; -defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">; +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">; // Compare f64 -defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">; -defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">; -defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">; -defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">; -defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">; -defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">; +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">; // .selp -defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">; -defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">; -defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">; -defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">; -defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">; +defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">; +defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">; +defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">; +defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">; +defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">; ///===- Logic and Shift Instructions --------------------------------------===// @@ -740,47 +782,47 @@ defm XOR : PTX_LOGIC<"xor", xor>; let neverHasSideEffects = 1 in { def MOVPREDrr - : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; + : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; def MOVU16rr - : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>; + : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>; def MOVU32rr - : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>; + : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>; def MOVU64rr - : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>; + : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>; def MOVF32rr - : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>; def MOVF64rr - : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVPREDri - : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", - [(set Preds:$d, imm:$a)]>; + : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a", + [(set RegPred:$d, imm:$a)]>; def MOVU16ri - : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", - [(set RRegu16:$d, imm:$a)]>; + : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RegI16:$d, imm:$a)]>; def MOVU32ri - : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RRegu32:$d, imm:$a)]>; + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, imm:$a)]>; def MOVU64ri - : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RRegu64:$d, imm:$a)]>; + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, imm:$a)]>; def MOVF32ri - : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", - [(set RRegf32:$d, fpimm:$a)]>; + : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RegF32:$d, fpimm:$a)]>; def MOVF64ri - : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", - [(set RRegf64:$d, fpimm:$a)]>; + : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RegF64:$d, fpimm:$a)]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVaddr32 - : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>; + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>; def MOVaddr64 - : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>; + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; } // Loads @@ -789,17 +831,48 @@ defm LDc : PTX_LD_ALL<"ld.const", load_constant>; defm LDl : PTX_LD_ALL<"ld.local", load_local>; defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; -// This is a special instruction that is manually inserted for kernel parameters -def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", []>; -def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", []>; -def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", []>; -def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", []>; -def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", []>; +// These instructions are used to load/store from the .param space for +// device and kernel parameters + +let hasSideEffects = 1 in { + def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), + "ld.param.pred\t$d, [$a]", + [(set RegPred:$d, (PTXloadparam timm:$a))]>; + def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", + [(set RegI16:$d, (PTXloadparam timm:$a))]>; + def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", + [(set RegI32:$d, (PTXloadparam timm:$a))]>; + def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", + [(set RegI64:$d, (PTXloadparam timm:$a))]>; + def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", + [(set RegF32:$d, (PTXloadparam timm:$a))]>; + def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", + [(set RegF64:$d, (PTXloadparam timm:$a))]>; + + def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), + "st.param.pred\t[$d], $a", + [(PTXstoreparam timm:$d, RegPred:$a)]>; + def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), + "st.param.u16\t[$d], $a", + [(PTXstoreparam timm:$d, RegI16:$a)]>; + def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), + "st.param.u32\t[$d], $a", + [(PTXstoreparam timm:$d, RegI32:$a)]>; + def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), + "st.param.u64\t[$d], $a", + [(PTXstoreparam timm:$d, RegI64:$a)]>; + def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), + "st.param.f32\t[$d], $a", + [(PTXstoreparam timm:$d, RegF32:$a)]>; + def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), + "st.param.f64\t[$d], $a", + [(PTXstoreparam timm:$d, RegF64:$a)]>; +} // Stores defm STg : PTX_ST_ALL<"st.global", store_global>; @@ -811,136 +884,174 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>; // TODO: Do something with st.param if/when it is needed. // Conversion to pred - +// PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. def CVT_pred_u16 - : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a", - [(set Preds:$d, (trunc RRegu16:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI16:$a))]>; def CVT_pred_u32 - : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", - [(set Preds:$d, (trunc RRegu32:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI32:$a))]>; def CVT_pred_u64 - : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a", - [(set Preds:$d, (trunc RRegu64:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI64:$a))]>; def CVT_pred_f32 - : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a", - [(set Preds:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", + [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; def CVT_pred_f64 - : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a", - [(set Preds:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", + [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u16 +// PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +def CVT_u16_preda + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (anyext RegPred:$a))]>; def CVT_u16_pred - : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a", - [(set RRegu16:$d, (zext Preds:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (zext RegPred:$a))]>; + +def CVT_u16_preds + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (sext RegPred:$a))]>; def CVT_u16_u32 - : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a", - [(set RRegu16:$d, (trunc RRegu32:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", + [(set RegI16:$d, (trunc RegI32:$a))]>; def CVT_u16_u64 - : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a", - [(set RRegu16:$d, (trunc RRegu64:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", + [(set RegI16:$d, (trunc RegI64:$a))]>; def CVT_u16_f32 - : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a", - [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", + [(set RegI16:$d, (fp_to_uint RegF32:$a))]>; def CVT_u16_f64 - : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a", - [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", + [(set RegI16:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u32 def CVT_u32_pred - : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", - [(set RRegu32:$d, (zext Preds:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (zext RegPred:$a))]>; + +def CVT_u32_b16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (anyext RegI16:$a))]>; def CVT_u32_u16 - : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a", - [(set RRegu32:$d, (zext RRegu16:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (zext RegI16:$a))]>; + +def CVT_u32_preds + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (sext RegPred:$a))]>; + +def CVT_u32_s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", + [(set RegI32:$d, (sext RegI16:$a))]>; def CVT_u32_u64 - : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a", - [(set RRegu32:$d, (trunc RRegu64:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", + [(set RegI32:$d, (trunc RegI64:$a))]>; def CVT_u32_f32 - : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a", - [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", + [(set RegI32:$d, (fp_to_uint RegF32:$a))]>; def CVT_u32_f64 - : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a", - [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", + [(set RegI32:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u64 def CVT_u64_pred - : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a", - [(set RRegu64:$d, (zext Preds:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (zext RegPred:$a))]>; + +def CVT_u64_preds + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (sext RegPred:$a))]>; def CVT_u64_u16 - : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a", - [(set RRegu64:$d, (zext RRegu16:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", + [(set RegI64:$d, (zext RegI16:$a))]>; + +def CVT_u64_s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", + [(set RegI64:$d, (sext RegI16:$a))]>; def CVT_u64_u32 - : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", - [(set RRegu64:$d, (zext RRegu32:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", + [(set RegI64:$d, (zext RegI32:$a))]>; + +def CVT_u64_s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", + [(set RegI64:$d, (sext RegI32:$a))]>; def CVT_u64_f32 - : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a", - [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", + [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; def CVT_u64_f64 - : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a", - [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", + [(set RegI64:$d, (fp_to_uint RegF64:$a))]>; // Conversion to f32 def CVT_f32_pred - : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a", - [(set RRegf32:$d, (uint_to_fp Preds:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegPred:$a), + "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0 + [(set RegF32:$d, (uint_to_fp RegPred:$a))]>; def CVT_f32_u16 - : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a", - [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI16:$a))]>; def CVT_f32_u32 - : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a", - [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI32:$a))]>; def CVT_f32_u64 - : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a", - [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI64:$a))]>; def CVT_f32_f64 - : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a", - [(set RRegf32:$d, (fround RRegf64:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", + [(set RegF32:$d, (fround RegF64:$a))]>; // Conversion to f64 def CVT_f64_pred - : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a", - [(set RRegf64:$d, (uint_to_fp Preds:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegPred:$a), + "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0 + [(set RegF64:$d, (uint_to_fp RegPred:$a))]>; def CVT_f64_u16 - : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a", - [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI16:$a))]>; def CVT_f64_u32 - : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a", - [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI32:$a))]>; def CVT_f64_u64 - : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a", - [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI64:$a))]>; def CVT_f64_f32 - : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a", - [(set RRegf64:$d, (fextend RRegf32:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", + [(set RegF64:$d, (fextend RegF32:$a))]>; ///===- Control Flow Instructions -----------------------------------------===// @@ -951,7 +1062,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1 in { // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a Preds register) here + // how to) use the first operand of PredicateOperand (a RegPred register) here def BRAdp : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [/*(brcond pred:$_p, bb:$d)*/]>; @@ -962,6 +1073,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; } +///===- Spill Instructions ------------------------------------------------===// +// Special instructions used for stack spilling +def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a), + "mov.u16\ts$d, $a", []>; +def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a), + "mov.u32\ts$d, $a", []>; +def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a), + "mov.u64\ts$d, $a", []>; +def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a), + "mov.f32\ts$d, $a", []>; +def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a), + "mov.f64\ts$d, $a", []>; + +def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a), + "mov.u16\t$d, s$a", []>; +def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a), + "mov.u32\t$d, s$a", []>; +def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a), + "mov.u64\t$d, s$a", []>; +def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a), + "mov.f32\t$d, s$a", []>; +def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a), + "mov.f64\t$d, s$a", []>; + ///===- Intrinsic Instructions --------------------------------------------===// include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 320934a2228c..8d97909d339a 100644 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -14,14 +14,14 @@ // PTX Special Purpose Register Accessor Intrinsics class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> - : InstPTX<(outs RRegu64:$d), (ins), + : InstPTX<(outs RegI64:$d), (ins), !strconcat("mov.u64\t$d, %", regname), - [(set RRegu64:$d, (intop))]>; + [(set RegI64:$d, (intop))]>; class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> - : InstPTX<(outs RRegu32:$d), (ins), + : InstPTX<(outs RegI32:$d), (ins), !strconcat("mov.u32\t$d, %", regname), - [(set RRegu32:$d, (intop))]>; + [(set RegI32:$d, (intop))]>; // TODO Add read vector-version of special registers diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp index 1574670b6e9b..b13a3dace130 100644 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" using namespace llvm; @@ -115,7 +114,8 @@ public: virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label); + const MCSymbol *Label, + unsigned PointerSize); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); @@ -260,7 +260,8 @@ void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label) { + const MCSymbol *Label, + unsigned PointerSize) { report_fatal_error("Unimplemented."); } @@ -367,7 +368,7 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, int64_t IntValue; if (!Value->EvaluateAsAbsolute(IntValue)) report_fatal_error("Don't know how to emit this value."); - if (getContext().getTargetAsmInfo().isLittleEndian()) { + if (getContext().getAsmInfo().isLittleEndian()) { EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); } else { diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index c5e191007239..6fe9e6c3f657 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -54,8 +54,6 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); - unsigned retreg = MFI->retReg(); - DEBUG(dbgs() << "PTX::NoRegister == " << PTX::NoRegister << "\n" << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); @@ -68,15 +66,13 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { // FIXME: This is a slow linear scanning for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) if (MRI.isPhysRegUsed(reg) && - reg != retreg && + !MFI->isRetReg(reg) && (MFI->isKernel() || !MFI->isArgReg(reg))) MFI->addLocalVarReg(reg); // Notify MachineFunctionInfo that I've done adding local var reg MFI->doneAddLocalVar(); - DEBUG(dbgs() << "Return Reg: " << retreg << "\n"); - DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->argRegBegin(), e = MFI->argRegEnd(); i != e; ++i) diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 81df1c236cb2..9d65f5bd1ade 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -15,6 +15,7 @@ #define PTX_MACHINE_FUNCTION_INFO_H #include "PTX.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -25,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo { private: bool is_kernel; std::vector<unsigned> reg_arg, reg_local_var; - unsigned reg_ret; + std::vector<unsigned> reg_ret; bool _isDoneAddArg; public: @@ -39,19 +40,22 @@ public: void addArgReg(unsigned reg) { reg_arg.push_back(reg); } void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void setRetReg(unsigned reg) { reg_ret = reg; } + void addRetReg(unsigned reg) { + if (!isRetReg(reg)) { + reg_ret.push_back(reg); + } + } void doneAddArg(void) { _isDoneAddArg = true; } void doneAddLocalVar(void) {} - bool isDoneAddArg(void) { return _isDoneAddArg; } - bool isKernel() const { return is_kernel; } typedef std::vector<unsigned>::const_iterator reg_iterator; typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; + typedef std::vector<unsigned>::const_iterator ret_iterator; bool argRegEmpty() const { return reg_arg.empty(); } int getNumArg() const { return reg_arg.size(); } @@ -64,12 +68,19 @@ public: reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } reg_iterator localVarRegEnd() const { return reg_local_var.end(); } - unsigned retReg() const { return reg_ret; } + bool retRegEmpty() const { return reg_ret.empty(); } + int getNumRet() const { return reg_ret.size(); } + ret_iterator retRegBegin() const { return reg_ret.begin(); } + ret_iterator retRegEnd() const { return reg_ret.end(); } bool isArgReg(unsigned reg) const { return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); } + bool isRetReg(unsigned reg) const { + return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + } + bool isLocalVarReg(unsigned reg) const { return std::find(reg_local_var.begin(), reg_local_var.end(), reg) != reg_local_var.end(); diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index 0f3e7bc2c3a7..cb56ea98a2b8 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -13,7 +13,39 @@ #include "PTX.h" #include "PTXRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_REGINFO_TARGET_DESC +#include "PTXGenRegisterInfo.inc" using namespace llvm; -#include "PTXGenRegisterInfo.inc" +PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &TII) + : PTXGenRegisterInfo() { +} + +void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const { + unsigned Index; + MachineInstr& MI = *II; + + Index = 0; + while (!MI.getOperand(Index).isFI()) { + ++Index; + assert(Index < MI.getNumOperands() && + "Instr does not have a FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(Index).getIndex(); + + DEBUG(dbgs() << "eliminateFrameIndex: " << MI); + DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); + DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + + // This frame index is post stack slot re-use assignments + MI.getOperand(Index).ChangeToImmediate(FrameIndex); +} diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h index dc5635238106..0b63cb6d458e 100644 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -17,7 +17,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/BitVector.h" -#include "PTXGenRegisterInfo.h.inc" +#define GET_REGINFO_HEADER +#include "PTXGenRegisterInfo.inc" namespace llvm { class PTXTargetMachine; @@ -25,7 +26,7 @@ class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) {} + const TargetInstrInfo &TII); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { @@ -38,11 +39,9 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo { return Reserved; // reserve no regs } - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS = NULL) const { - llvm_unreachable("PTX does not support general function call"); - } + RegScavenger *RS = NULL) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const { llvm_unreachable("PTX does not have a frame register"); diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index f6161419fec1..1313d248325e 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -1,3 +1,4 @@ + //===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// // // The LLVM Compiler Infrastructure @@ -21,16 +22,16 @@ class PTXReg<string n> : Register<n> { ///===- Predicate Registers -----------------------------------------------===// -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; -def P8 : PTXReg<"p8">; -def P9 : PTXReg<"p9">; +def P0 : PTXReg<"p0">; +def P1 : PTXReg<"p1">; +def P2 : PTXReg<"p2">; +def P3 : PTXReg<"p3">; +def P4 : PTXReg<"p4">; +def P5 : PTXReg<"p5">; +def P6 : PTXReg<"p6">; +def P7 : PTXReg<"p7">; +def P8 : PTXReg<"p8">; +def P9 : PTXReg<"p9">; def P10 : PTXReg<"p10">; def P11 : PTXReg<"p11">; def P12 : PTXReg<"p12">; @@ -85,19 +86,83 @@ def P60 : PTXReg<"p60">; def P61 : PTXReg<"p61">; def P62 : PTXReg<"p62">; def P63 : PTXReg<"p63">; +def P64 : PTXReg<"p64">; +def P65 : PTXReg<"p65">; +def P66 : PTXReg<"p66">; +def P67 : PTXReg<"p67">; +def P68 : PTXReg<"p68">; +def P69 : PTXReg<"p69">; +def P70 : PTXReg<"p70">; +def P71 : PTXReg<"p71">; +def P72 : PTXReg<"p72">; +def P73 : PTXReg<"p73">; +def P74 : PTXReg<"p74">; +def P75 : PTXReg<"p75">; +def P76 : PTXReg<"p76">; +def P77 : PTXReg<"p77">; +def P78 : PTXReg<"p78">; +def P79 : PTXReg<"p79">; +def P80 : PTXReg<"p80">; +def P81 : PTXReg<"p81">; +def P82 : PTXReg<"p82">; +def P83 : PTXReg<"p83">; +def P84 : PTXReg<"p84">; +def P85 : PTXReg<"p85">; +def P86 : PTXReg<"p86">; +def P87 : PTXReg<"p87">; +def P88 : PTXReg<"p88">; +def P89 : PTXReg<"p89">; +def P90 : PTXReg<"p90">; +def P91 : PTXReg<"p91">; +def P92 : PTXReg<"p92">; +def P93 : PTXReg<"p93">; +def P94 : PTXReg<"p94">; +def P95 : PTXReg<"p95">; +def P96 : PTXReg<"p96">; +def P97 : PTXReg<"p97">; +def P98 : PTXReg<"p98">; +def P99 : PTXReg<"p99">; +def P100 : PTXReg<"p100">; +def P101 : PTXReg<"p101">; +def P102 : PTXReg<"p102">; +def P103 : PTXReg<"p103">; +def P104 : PTXReg<"p104">; +def P105 : PTXReg<"p105">; +def P106 : PTXReg<"p106">; +def P107 : PTXReg<"p107">; +def P108 : PTXReg<"p108">; +def P109 : PTXReg<"p109">; +def P110 : PTXReg<"p110">; +def P111 : PTXReg<"p111">; +def P112 : PTXReg<"p112">; +def P113 : PTXReg<"p113">; +def P114 : PTXReg<"p114">; +def P115 : PTXReg<"p115">; +def P116 : PTXReg<"p116">; +def P117 : PTXReg<"p117">; +def P118 : PTXReg<"p118">; +def P119 : PTXReg<"p119">; +def P120 : PTXReg<"p120">; +def P121 : PTXReg<"p121">; +def P122 : PTXReg<"p122">; +def P123 : PTXReg<"p123">; +def P124 : PTXReg<"p124">; +def P125 : PTXReg<"p125">; +def P126 : PTXReg<"p126">; +def P127 : PTXReg<"p127">; -///===- 16-bit Integer Registers ------------------------------------------===// +///===- 16-Bit Registers --------------------------------------------------===// -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; -def RH8 : PTXReg<"rh8">; -def RH9 : PTXReg<"rh9">; +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; def RH10 : PTXReg<"rh10">; def RH11 : PTXReg<"rh11">; def RH12 : PTXReg<"rh12">; @@ -152,20 +217,83 @@ def RH60 : PTXReg<"rh60">; def RH61 : PTXReg<"rh61">; def RH62 : PTXReg<"rh62">; def RH63 : PTXReg<"rh63">; +def RH64 : PTXReg<"rh64">; +def RH65 : PTXReg<"rh65">; +def RH66 : PTXReg<"rh66">; +def RH67 : PTXReg<"rh67">; +def RH68 : PTXReg<"rh68">; +def RH69 : PTXReg<"rh69">; +def RH70 : PTXReg<"rh70">; +def RH71 : PTXReg<"rh71">; +def RH72 : PTXReg<"rh72">; +def RH73 : PTXReg<"rh73">; +def RH74 : PTXReg<"rh74">; +def RH75 : PTXReg<"rh75">; +def RH76 : PTXReg<"rh76">; +def RH77 : PTXReg<"rh77">; +def RH78 : PTXReg<"rh78">; +def RH79 : PTXReg<"rh79">; +def RH80 : PTXReg<"rh80">; +def RH81 : PTXReg<"rh81">; +def RH82 : PTXReg<"rh82">; +def RH83 : PTXReg<"rh83">; +def RH84 : PTXReg<"rh84">; +def RH85 : PTXReg<"rh85">; +def RH86 : PTXReg<"rh86">; +def RH87 : PTXReg<"rh87">; +def RH88 : PTXReg<"rh88">; +def RH89 : PTXReg<"rh89">; +def RH90 : PTXReg<"rh90">; +def RH91 : PTXReg<"rh91">; +def RH92 : PTXReg<"rh92">; +def RH93 : PTXReg<"rh93">; +def RH94 : PTXReg<"rh94">; +def RH95 : PTXReg<"rh95">; +def RH96 : PTXReg<"rh96">; +def RH97 : PTXReg<"rh97">; +def RH98 : PTXReg<"rh98">; +def RH99 : PTXReg<"rh99">; +def RH100 : PTXReg<"rh100">; +def RH101 : PTXReg<"rh101">; +def RH102 : PTXReg<"rh102">; +def RH103 : PTXReg<"rh103">; +def RH104 : PTXReg<"rh104">; +def RH105 : PTXReg<"rh105">; +def RH106 : PTXReg<"rh106">; +def RH107 : PTXReg<"rh107">; +def RH108 : PTXReg<"rh108">; +def RH109 : PTXReg<"rh109">; +def RH110 : PTXReg<"rh110">; +def RH111 : PTXReg<"rh111">; +def RH112 : PTXReg<"rh112">; +def RH113 : PTXReg<"rh113">; +def RH114 : PTXReg<"rh114">; +def RH115 : PTXReg<"rh115">; +def RH116 : PTXReg<"rh116">; +def RH117 : PTXReg<"rh117">; +def RH118 : PTXReg<"rh118">; +def RH119 : PTXReg<"rh119">; +def RH120 : PTXReg<"rh120">; +def RH121 : PTXReg<"rh121">; +def RH122 : PTXReg<"rh122">; +def RH123 : PTXReg<"rh123">; +def RH124 : PTXReg<"rh124">; +def RH125 : PTXReg<"rh125">; +def RH126 : PTXReg<"rh126">; +def RH127 : PTXReg<"rh127">; +///===- 32-Bit Registers --------------------------------------------------===// -///===- 32-bit Integer Registers ------------------------------------------===// - -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; -def R8 : PTXReg<"r8">; -def R9 : PTXReg<"r9">; +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; def R10 : PTXReg<"r10">; def R11 : PTXReg<"r11">; def R12 : PTXReg<"r12">; @@ -220,20 +348,83 @@ def R60 : PTXReg<"r60">; def R61 : PTXReg<"r61">; def R62 : PTXReg<"r62">; def R63 : PTXReg<"r63">; +def R64 : PTXReg<"r64">; +def R65 : PTXReg<"r65">; +def R66 : PTXReg<"r66">; +def R67 : PTXReg<"r67">; +def R68 : PTXReg<"r68">; +def R69 : PTXReg<"r69">; +def R70 : PTXReg<"r70">; +def R71 : PTXReg<"r71">; +def R72 : PTXReg<"r72">; +def R73 : PTXReg<"r73">; +def R74 : PTXReg<"r74">; +def R75 : PTXReg<"r75">; +def R76 : PTXReg<"r76">; +def R77 : PTXReg<"r77">; +def R78 : PTXReg<"r78">; +def R79 : PTXReg<"r79">; +def R80 : PTXReg<"r80">; +def R81 : PTXReg<"r81">; +def R82 : PTXReg<"r82">; +def R83 : PTXReg<"r83">; +def R84 : PTXReg<"r84">; +def R85 : PTXReg<"r85">; +def R86 : PTXReg<"r86">; +def R87 : PTXReg<"r87">; +def R88 : PTXReg<"r88">; +def R89 : PTXReg<"r89">; +def R90 : PTXReg<"r90">; +def R91 : PTXReg<"r91">; +def R92 : PTXReg<"r92">; +def R93 : PTXReg<"r93">; +def R94 : PTXReg<"r94">; +def R95 : PTXReg<"r95">; +def R96 : PTXReg<"r96">; +def R97 : PTXReg<"r97">; +def R98 : PTXReg<"r98">; +def R99 : PTXReg<"r99">; +def R100 : PTXReg<"r100">; +def R101 : PTXReg<"r101">; +def R102 : PTXReg<"r102">; +def R103 : PTXReg<"r103">; +def R104 : PTXReg<"r104">; +def R105 : PTXReg<"r105">; +def R106 : PTXReg<"r106">; +def R107 : PTXReg<"r107">; +def R108 : PTXReg<"r108">; +def R109 : PTXReg<"r109">; +def R110 : PTXReg<"r110">; +def R111 : PTXReg<"r111">; +def R112 : PTXReg<"r112">; +def R113 : PTXReg<"r113">; +def R114 : PTXReg<"r114">; +def R115 : PTXReg<"r115">; +def R116 : PTXReg<"r116">; +def R117 : PTXReg<"r117">; +def R118 : PTXReg<"r118">; +def R119 : PTXReg<"r119">; +def R120 : PTXReg<"r120">; +def R121 : PTXReg<"r121">; +def R122 : PTXReg<"r122">; +def R123 : PTXReg<"r123">; +def R124 : PTXReg<"r124">; +def R125 : PTXReg<"r125">; +def R126 : PTXReg<"r126">; +def R127 : PTXReg<"r127">; +///===- 64-Bit Registers --------------------------------------------------===// -///===- 64-bit Integer Registers ------------------------------------------===// - -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; -def RD8 : PTXReg<"rd8">; -def RD9 : PTXReg<"rd9">; +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; def RD10 : PTXReg<"rd10">; def RD11 : PTXReg<"rd11">; def RD12 : PTXReg<"rd12">; @@ -288,204 +479,77 @@ def RD60 : PTXReg<"rd60">; def RD61 : PTXReg<"rd61">; def RD62 : PTXReg<"rd62">; def RD63 : PTXReg<"rd63">; - - -///===- 32-bit Floating-Point Registers -----------------------------------===// - -def F0 : PTXReg<"f0">; -def F1 : PTXReg<"f1">; -def F2 : PTXReg<"f2">; -def F3 : PTXReg<"f3">; -def F4 : PTXReg<"f4">; -def F5 : PTXReg<"f5">; -def F6 : PTXReg<"f6">; -def F7 : PTXReg<"f7">; -def F8 : PTXReg<"f8">; -def F9 : PTXReg<"f9">; -def F10 : PTXReg<"f10">; -def F11 : PTXReg<"f11">; -def F12 : PTXReg<"f12">; -def F13 : PTXReg<"f13">; -def F14 : PTXReg<"f14">; -def F15 : PTXReg<"f15">; -def F16 : PTXReg<"f16">; -def F17 : PTXReg<"f17">; -def F18 : PTXReg<"f18">; -def F19 : PTXReg<"f19">; -def F20 : PTXReg<"f20">; -def F21 : PTXReg<"f21">; -def F22 : PTXReg<"f22">; -def F23 : PTXReg<"f23">; -def F24 : PTXReg<"f24">; -def F25 : PTXReg<"f25">; -def F26 : PTXReg<"f26">; -def F27 : PTXReg<"f27">; -def F28 : PTXReg<"f28">; -def F29 : PTXReg<"f29">; -def F30 : PTXReg<"f30">; -def F31 : PTXReg<"f31">; -def F32 : PTXReg<"f32">; -def F33 : PTXReg<"f33">; -def F34 : PTXReg<"f34">; -def F35 : PTXReg<"f35">; -def F36 : PTXReg<"f36">; -def F37 : PTXReg<"f37">; -def F38 : PTXReg<"f38">; -def F39 : PTXReg<"f39">; -def F40 : PTXReg<"f40">; -def F41 : PTXReg<"f41">; -def F42 : PTXReg<"f42">; -def F43 : PTXReg<"f43">; -def F44 : PTXReg<"f44">; -def F45 : PTXReg<"f45">; -def F46 : PTXReg<"f46">; -def F47 : PTXReg<"f47">; -def F48 : PTXReg<"f48">; -def F49 : PTXReg<"f49">; -def F50 : PTXReg<"f50">; -def F51 : PTXReg<"f51">; -def F52 : PTXReg<"f52">; -def F53 : PTXReg<"f53">; -def F54 : PTXReg<"f54">; -def F55 : PTXReg<"f55">; -def F56 : PTXReg<"f56">; -def F57 : PTXReg<"f57">; -def F58 : PTXReg<"f58">; -def F59 : PTXReg<"f59">; -def F60 : PTXReg<"f60">; -def F61 : PTXReg<"f61">; -def F62 : PTXReg<"f62">; -def F63 : PTXReg<"f63">; - - -///===- 64-bit Floating-Point Registers -----------------------------------===// - -def FD0 : PTXReg<"fd0">; -def FD1 : PTXReg<"fd1">; -def FD2 : PTXReg<"fd2">; -def FD3 : PTXReg<"fd3">; -def FD4 : PTXReg<"fd4">; -def FD5 : PTXReg<"fd5">; -def FD6 : PTXReg<"fd6">; -def FD7 : PTXReg<"fd7">; -def FD8 : PTXReg<"fd8">; -def FD9 : PTXReg<"fd9">; -def FD10 : PTXReg<"fd10">; -def FD11 : PTXReg<"fd11">; -def FD12 : PTXReg<"fd12">; -def FD13 : PTXReg<"fd13">; -def FD14 : PTXReg<"fd14">; -def FD15 : PTXReg<"fd15">; -def FD16 : PTXReg<"fd16">; -def FD17 : PTXReg<"fd17">; -def FD18 : PTXReg<"fd18">; -def FD19 : PTXReg<"fd19">; -def FD20 : PTXReg<"fd20">; -def FD21 : PTXReg<"fd21">; -def FD22 : PTXReg<"fd22">; -def FD23 : PTXReg<"fd23">; -def FD24 : PTXReg<"fd24">; -def FD25 : PTXReg<"fd25">; -def FD26 : PTXReg<"fd26">; -def FD27 : PTXReg<"fd27">; -def FD28 : PTXReg<"fd28">; -def FD29 : PTXReg<"fd29">; -def FD30 : PTXReg<"fd30">; -def FD31 : PTXReg<"fd31">; -def FD32 : PTXReg<"fd32">; -def FD33 : PTXReg<"fd33">; -def FD34 : PTXReg<"fd34">; -def FD35 : PTXReg<"fd35">; -def FD36 : PTXReg<"fd36">; -def FD37 : PTXReg<"fd37">; -def FD38 : PTXReg<"fd38">; -def FD39 : PTXReg<"fd39">; -def FD40 : PTXReg<"fd40">; -def FD41 : PTXReg<"fd41">; -def FD42 : PTXReg<"fd42">; -def FD43 : PTXReg<"fd43">; -def FD44 : PTXReg<"fd44">; -def FD45 : PTXReg<"fd45">; -def FD46 : PTXReg<"f4d6">; -def FD47 : PTXReg<"fd47">; -def FD48 : PTXReg<"fd48">; -def FD49 : PTXReg<"fd49">; -def FD50 : PTXReg<"fd50">; -def FD51 : PTXReg<"fd51">; -def FD52 : PTXReg<"fd52">; -def FD53 : PTXReg<"fd53">; -def FD54 : PTXReg<"fd54">; -def FD55 : PTXReg<"fd55">; -def FD56 : PTXReg<"fd56">; -def FD57 : PTXReg<"fd57">; -def FD58 : PTXReg<"fd58">; -def FD59 : PTXReg<"fd59">; -def FD60 : PTXReg<"fd60">; -def FD61 : PTXReg<"fd61">; -def FD62 : PTXReg<"fd62">; -def FD63 : PTXReg<"fd63">; - +def RD64 : PTXReg<"rd64">; +def RD65 : PTXReg<"rd65">; +def RD66 : PTXReg<"rd66">; +def RD67 : PTXReg<"rd67">; +def RD68 : PTXReg<"rd68">; +def RD69 : PTXReg<"rd69">; +def RD70 : PTXReg<"rd70">; +def RD71 : PTXReg<"rd71">; +def RD72 : PTXReg<"rd72">; +def RD73 : PTXReg<"rd73">; +def RD74 : PTXReg<"rd74">; +def RD75 : PTXReg<"rd75">; +def RD76 : PTXReg<"rd76">; +def RD77 : PTXReg<"rd77">; +def RD78 : PTXReg<"rd78">; +def RD79 : PTXReg<"rd79">; +def RD80 : PTXReg<"rd80">; +def RD81 : PTXReg<"rd81">; +def RD82 : PTXReg<"rd82">; +def RD83 : PTXReg<"rd83">; +def RD84 : PTXReg<"rd84">; +def RD85 : PTXReg<"rd85">; +def RD86 : PTXReg<"rd86">; +def RD87 : PTXReg<"rd87">; +def RD88 : PTXReg<"rd88">; +def RD89 : PTXReg<"rd89">; +def RD90 : PTXReg<"rd90">; +def RD91 : PTXReg<"rd91">; +def RD92 : PTXReg<"rd92">; +def RD93 : PTXReg<"rd93">; +def RD94 : PTXReg<"rd94">; +def RD95 : PTXReg<"rd95">; +def RD96 : PTXReg<"rd96">; +def RD97 : PTXReg<"rd97">; +def RD98 : PTXReg<"rd98">; +def RD99 : PTXReg<"rd99">; +def RD100 : PTXReg<"rd100">; +def RD101 : PTXReg<"rd101">; +def RD102 : PTXReg<"rd102">; +def RD103 : PTXReg<"rd103">; +def RD104 : PTXReg<"rd104">; +def RD105 : PTXReg<"rd105">; +def RD106 : PTXReg<"rd106">; +def RD107 : PTXReg<"rd107">; +def RD108 : PTXReg<"rd108">; +def RD109 : PTXReg<"rd109">; +def RD110 : PTXReg<"rd110">; +def RD111 : PTXReg<"rd111">; +def RD112 : PTXReg<"rd112">; +def RD113 : PTXReg<"rd113">; +def RD114 : PTXReg<"rd114">; +def RD115 : PTXReg<"rd115">; +def RD116 : PTXReg<"rd116">; +def RD117 : PTXReg<"rd117">; +def RD118 : PTXReg<"rd118">; +def RD119 : PTXReg<"rd119">; +def RD120 : PTXReg<"rd120">; +def RD121 : PTXReg<"rd121">; +def RD122 : PTXReg<"rd122">; +def RD123 : PTXReg<"rd123">; +def RD124 : PTXReg<"rd124">; +def RD125 : PTXReg<"rd125">; +def RD126 : PTXReg<"rd126">; +def RD127 : PTXReg<"rd127">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// - -def Preds : RegisterClass<"PTX", [i1], 8, - [P0, P1, P2, P3, P4, P5, P6, P7, - P8, P9, P10, P11, P12, P13, P14, P15, - P16, P17, P18, P19, P20, P21, P22, P23, - P24, P25, P26, P27, P28, P29, P30, P31, - P32, P33, P34, P35, P36, P37, P38, P39, - P40, P41, P42, P43, P44, P45, P46, P47, - P48, P49, P50, P51, P52, P53, P54, P55, - P56, P57, P58, P59, P60, P61, P62, P63]>; - -def RRegu16 : RegisterClass<"PTX", [i16], 16, - [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, - RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15, - RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, - RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, - RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, - RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, - RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, - RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>; - -def RRegu32 : RegisterClass<"PTX", [i32], 32, - [R0, R1, R2, R3, R4, R5, R6, R7, - R8, R9, R10, R11, R12, R13, R14, R15, - R16, R17, R18, R19, R20, R21, R22, R23, - R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, - R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, - R56, R57, R58, R59, R60, R61, R62, R63]>; - -def RRegu64 : RegisterClass<"PTX", [i64], 64, - [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, - RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15, - RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, - RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, - RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, - RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, - RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, - RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>; - -def RRegf32 : RegisterClass<"PTX", [f32], 32, - [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, - F16, F17, F18, F19, F20, F21, F22, F23, - F24, F25, F26, F27, F28, F29, F30, F31, - F32, F33, F34, F35, F36, F37, F38, F39, - F40, F41, F42, F43, F44, F45, F46, F47, - F48, F49, F50, F51, F52, F53, F54, F55, - F56, F57, F58, F59, F60, F61, F62, F63]>; - -def RRegf64 : RegisterClass<"PTX", [f64], 64, - [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7, - FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15, - FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23, - FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31, - FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39, - FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47, - FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55, - FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>; +def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index e8a1dfecd00c..8ec646e46f68 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -7,32 +7,51 @@ // //===----------------------------------------------------------------------===// // -// This file implements the PTX specific subclass of TargetSubtarget. +// This file implements the PTX specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "PTXSubtarget.h" +#include "PTX.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "PTXGenSubtargetInfo.inc" using namespace llvm; -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) - : PTXShaderModel(PTX_SM_1_0), +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PTXGenSubtargetInfo(TT, CPU, FS), + PTXTarget(PTX_COMPUTE_1_0), PTXVersion(PTX_VERSION_2_0), SupportsDouble(false), SupportsFMA(true), - Is64Bit(is64Bit) { - std::string TARGET = "generic"; - ParseSubtargetFeatures(FS, TARGET); + Is64Bit(is64Bit) { + std::string TARGET = CPU; + if (TARGET.empty()) + TARGET = "generic"; + ParseSubtargetFeatures(TARGET, FS); } std::string PTXSubtarget::getTargetString() const { - switch(PTXShaderModel) { - default: llvm_unreachable("Unknown shader model"); + switch(PTXTarget) { + default: llvm_unreachable("Unknown PTX target"); case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_1: return "sm_11"; + case PTX_SM_1_2: return "sm_12"; case PTX_SM_1_3: return "sm_13"; case PTX_SM_2_0: return "sm_20"; + case PTX_SM_2_1: return "sm_21"; + case PTX_SM_2_2: return "sm_22"; + case PTX_SM_2_3: return "sm_23"; + case PTX_COMPUTE_1_0: return "compute_10"; + case PTX_COMPUTE_1_1: return "compute_11"; + case PTX_COMPUTE_1_2: return "compute_12"; + case PTX_COMPUTE_1_3: return "compute_13"; + case PTX_COMPUTE_2_0: return "compute_20"; } } @@ -45,5 +64,3 @@ std::string PTXSubtarget::getPTXVersionString() const { case PTX_VERSION_2_3: return "2.3"; } } - -#include "PTXGenSubtarget.inc" diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 59fa6965bbac..0921f1f22c49 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -7,26 +7,44 @@ // //===----------------------------------------------------------------------===// // -// This file declares the PTX specific subclass of TargetSubtarget. +// This file declares the PTX specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef PTX_SUBTARGET_H #define PTX_SUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "PTXGenSubtargetInfo.inc" namespace llvm { - class PTXSubtarget : public TargetSubtarget { - private: +class StringRef; + + class PTXSubtarget : public PTXGenSubtargetInfo { + public: /** * Enumeration of Shader Models supported by the back-end. */ - enum PTXShaderModelEnum { + enum PTXTargetEnum { + PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ + PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ + PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ + PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ + PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ + PTX_LAST_COMPUTE, + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_1, /*< Shader Model 1.1 */ + PTX_SM_1_2, /*< Shader Model 1.2 */ PTX_SM_1_3, /*< Shader Model 1.3 */ - PTX_SM_2_0 /*< Shader Model 2.0 */ + PTX_SM_2_0, /*< Shader Model 2.0 */ + PTX_SM_2_1, /*< Shader Model 2.1 */ + PTX_SM_2_2, /*< Shader Model 2.2 */ + PTX_SM_2_3, /*< Shader Model 2.3 */ + PTX_LAST_SM }; /** @@ -41,24 +59,30 @@ namespace llvm { PTX_VERSION_2_3 /*< PTX Version 2.3 */ }; + private: + /// Shader Model supported on the target GPU. - PTXShaderModelEnum PTXShaderModel; + PTXTargetEnum PTXTarget; /// PTX Language Version. PTXVersionEnum PTXVersion; // The native .f64 type is supported on the hardware. bool SupportsDouble; - - // Support the fused-multiply add (FMA) and multiply-add (MAD) instructions + + // Support the fused-multiply add (FMA) and multiply-add (MAD) + // instructions bool SupportsFMA; - + // Use .u64 instead of .u32 for addresses. bool Is64Bit; public: - PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + // Target architecture accessors std::string getTargetString() const; std::string getPTXVersionString() const; @@ -68,10 +92,6 @@ namespace llvm { bool is64Bit() const { return Is64Bit; } bool supportsFMA() const { return SupportsFMA; } - - bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; } - - bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } @@ -79,8 +99,22 @@ namespace llvm { bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + bool fdivNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool fmadNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool useParamSpaceForDeviceArgs() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; // class PTXSubtarget } // namespace llvm diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 1b737c9d8634..ab926e02d66f 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "PTX.h" -#include "PTXMCAsmInfo.h" #include "PTXTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" @@ -35,9 +34,6 @@ extern "C" void LLVMInitializePTXTarget() { RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); - RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target); - RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target); - TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); } @@ -52,11 +48,12 @@ namespace { // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), + : LLVMTargetMachine(T, TT, CPU, FS), DataLayout(is64Bit ? DataLayout64 : DataLayout32), - Subtarget(TT, FS, is64Bit), + Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), TLInfo(*this) { @@ -64,14 +61,16 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, const std::string& TT, + const std::string& CPU, const std::string& FS) - : PTXTargetMachine(T, TT, FS, false) { + : PTXTargetMachine(T, TT, CPU, FS, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, const std::string& TT, + const std::string& CPU, const std::string& FS) - : PTXTargetMachine(T, TT, FS, true) { + : PTXTargetMachine(T, TT, CPU, FS, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 149be8e3b7e9..ae4215325211 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -33,7 +33,8 @@ class PTXTargetMachine : public LLVMTargetMachine { public: PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -61,14 +62,14 @@ class PTX32TargetMachine : public PTXTargetMachine { public: PTX32TargetMachine(const Target &T, const std::string &TT, - const std::string& FS); + const std::string& CPU, const std::string& FS); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { public: PTX64TargetMachine(const Target &T, const std::string &TT, - const std::string& FS); + const std::string& CPU, const std::string& FS); }; // class PTX32TargetMachine } // namespace llvm diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py new file mode 100755 index 000000000000..15286908961d --- /dev/null +++ b/lib/Target/PTX/generate-register-td.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +##===- generate-register-td.py --------------------------------*-python-*--===## +## +## The LLVM Compiler Infrastructure +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +## +##===----------------------------------------------------------------------===## +## +## This file describes the PTX register file generator. +## +##===----------------------------------------------------------------------===## + +from sys import argv, exit, stdout + + +if len(argv) != 5: + print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') + exit(1) + +try: + num_pred = int(argv[1]) + num_16bit = int(argv[2]) + num_32bit = int(argv[3]) + num_64bit = int(argv[4]) +except: + print('ERROR: Invalid integer parameter') + exit(1) + +## Print the register definition file +td_file = open('PTXRegisterInfo.td', 'w') + +td_file.write(''' +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +''') + + +# Print predicate registers +td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') +for r in range(0, num_pred): + td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) + +# Print 16-bit registers +td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_16bit): + td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) + +# Print 32-bit registers +td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_32bit): + td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) + +# Print 64-bit registers +td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_64bit): + td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) + + +td_file.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + + +# Print register classes + +td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) +td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) +td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) +td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) + + +td_file.close() + +## Now write the PTXCallingConv.td file +td_file = open('PTXCallingConv.td', 'w') + +# Reserve 10% of the available registers for return values, and the other 90% +# for parameters +num_ret_pred = int(0.1 * num_pred) +num_ret_16bit = int(0.1 * num_16bit) +num_ret_32bit = int(0.1 * num_32bit) +num_ret_64bit = int(0.1 * num_64bit) +num_param_pred = num_pred - num_ret_pred +num_param_16bit = num_16bit - num_ret_16bit +num_param_32bit = num_32bit - num_ret_32bit +num_param_64bit = num_64bit - num_ret_64bit + +param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] +ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] +param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] +ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] +param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] +ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] +param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] +ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] + +param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) +ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) +param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) +ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) +param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) +ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) +param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) +ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) + +td_file.write(''' +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; +''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, + ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) + + +td_file.close() diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index f28257999d1b..d1dda3716c4a 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -1,16 +1,13 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) -tablegen(PPCGenInstrNames.inc -gen-instr-enums) -tablegen(PPCGenRegisterNames.inc -gen-register-enums) tablegen(PPCGenAsmWriter.inc -gen-asm-writer) tablegen(PPCGenCodeEmitter.inc -gen-emitter) tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PPCGenRegisterInfo.inc -gen-register-desc) -tablegen(PPCGenInstrInfo.inc -gen-instr-desc) +tablegen(PPCGenRegisterInfo.inc -gen-register-info) +tablegen(PPCGenInstrInfo.inc -gen-instr-info) tablegen(PPCGenDAGISel.inc -gen-dag-isel) tablegen(PPCGenCallingConv.inc -gen-callingconv) -tablegen(PPCGenSubtarget.inc -gen-subtarget) +tablegen(PPCGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(PowerPCCodeGen PPCAsmBackend.cpp @@ -23,7 +20,6 @@ add_llvm_target(PowerPCCodeGen PPCISelLowering.cpp PPCFrameLowering.cpp PPCJITInfo.cpp - PPCMCAsmInfo.cpp PPCMCCodeEmitter.cpp PPCMCInstLower.cpp PPCPredicates.cpp @@ -35,3 +31,4 @@ add_llvm_target(PowerPCCodeGen add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index adfa0aa6306b..d022a4496e84 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -19,14 +19,12 @@ namespace llvm { class MCOperand; -class TargetMachine; class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI, - unsigned syntaxVariant) + PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..a1b81662115a --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMPowerPCDesc + PPCMCTargetDesc.cpp + PPCMCAsmInfo.cpp + ) diff --git a/lib/Target/PowerPC/MCTargetDesc/Makefile b/lib/Target/PowerPC/MCTargetDesc/Makefile new file mode 100644 index 000000000000..9db66622cced --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 2d5c8809ba9f..b6dca835b18d 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -15,6 +15,10 @@ using namespace llvm; PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { + if (is64Bit) + PointerSize = 8; + IsLittleEndian = false; + PCSymbol = "."; CommentString = ";"; ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 96ae6fbba0e4..96ae6fbba0e4 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp new file mode 100644 index 000000000000..02b887f4d5dc --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -0,0 +1,70 @@ +//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PowerPC specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "PPCMCTargetDesc.h" +#include "PPCMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "PPCGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "PPCGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "PPCGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createPPCMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitPPCMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializePowerPCMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo); +} + + +static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitPPCMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializePowerPCMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target, + createPPCMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target, + createPPCMCSubtargetInfo); +} + +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + bool isPPC64 = TheTriple.getArch() == Triple::ppc64; + if (TheTriple.isOSDarwin()) + return new PPCMCAsmInfoDarwin(isPPC64); + return new PPCLinuxMCAsmInfo(isPPC64); + +} + +extern "C" void LLVMInitializePowerPCMCAsmInfo() { + RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo); + RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo); +} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h new file mode 100644 index 000000000000..cee235097a0a --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -0,0 +1,41 @@ +//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PowerPC specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCMCTARGETDESC_H +#define PPCMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target ThePPC32Target; +extern Target ThePPC64Target; + +} // End llvm namespace + +// Defines symbolic names for PowerPC registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "PPCGenRegisterInfo.inc" + +// Defines symbolic names for the PowerPC instructions. +// +#define GET_INSTRINFO_ENUM +#include "PPCGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "PPCGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index 030defe212c0..1617b26ca4a5 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -12,13 +12,12 @@ LIBRARYNAME = LLVMPowerPCCodeGen TARGET = PPC # Make sure that tblgen is run, first thing. -BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ +BUILT_SOURCES = PPCGenRegisterInfo.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ - PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ - PPCGenSubtarget.inc PPCGenCallingConv.inc \ + PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ PPCGenMCCodeEmitter.inc -DIRS = InstPrinter TargetInfo +DIRS = InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 92672b5b172b..7191dd105f3c 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H +#include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> // GCC #defines PPC on Linux but we use it as our namespace name @@ -31,6 +32,8 @@ namespace llvm { class MCInst; class MCCodeEmitter; class MCContext; + class MCInstrInfo; + class MCSubtargetInfo; class TargetMachine; class TargetAsmBackend; @@ -38,16 +41,14 @@ namespace llvm { FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &MCE); - MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM, + MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, MCContext &Ctx); TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); - extern Target ThePPC32Target; - extern Target ThePPC64Target; - namespace PPCII { /// Target Operand Flag enum. @@ -81,13 +82,4 @@ namespace llvm { } // end namespace llvm; -// Defines symbolic names for PowerPC registers. This defines a mapping from -// register name to register number. -// -#include "PPCGenRegisterNames.inc" - -// Defines symbolic names for the PowerPC instructions. -// -#include "PPCGenInstrNames.inc" - #endif diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp index f562a3f4f9e8..4b8cbb711833 100644 --- a/lib/Target/PowerPC/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/PPCAsmBackend.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; @@ -23,6 +24,11 @@ public: PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) {} }; class PPCAsmBackend : public TargetAsmBackend { diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index b795db9594ff..9de2200296e8 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -680,10 +680,9 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { - return new PPCInstPrinter(TM, MAI, SyntaxVariant); + return new PPCInstPrinter(MAI, SyntaxVariant); } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 74ecff5af620..cddc9d858adf 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -73,12 +73,12 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, } Opcode = ~Opcode; - const TargetInstrDesc &TID = TII.get(Opcode); + const MCInstrDesc &MCID = TII.get(Opcode); - isLoad = TID.mayLoad(); - isStore = TID.mayStore(); + isLoad = MCID.mayLoad(); + isStore = MCID.mayStore(); - uint64_t TSFlags = TID.TSFlags; + uint64_t TSFlags = MCID.TSFlags; isFirst = TSFlags & PPCII::PPC970_First; isSingle = TSFlags & PPCII::PPC970_Single; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 511bb223cada..2176c02c8503 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -610,6 +610,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = (PtrVT == MVT::i64); + if (isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. @@ -624,6 +627,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } case ISD::SETNE: { + if (isPPC64) break; SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U)), 0); @@ -647,6 +651,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { switch (CC) { default: break; case ISD::SETEQ: + if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(1)), 0); return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, @@ -655,6 +660,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { getI32Imm(0)), 0), Op.getValue(1)); case ISD::SETNE: { + if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U)); @@ -996,22 +1002,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); + EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + bool isPPC64 = (PtrVT == MVT::i64); // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc - if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) - if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) - if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) - if (N1C->isNullValue() && N3C->isNullValue() && - N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && - // FIXME: Implement this optzn for PPC64. - N->getValueType(0) == MVT::i32) { - SDNode *Tmp = - CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, - N->getOperand(0), getI32Imm(~0U)); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, - SDValue(Tmp, 0), N->getOperand(0), - SDValue(Tmp, 1)); - } + if (!isPPC64) + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) + if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) + if (N1C->isNullValue() && N3C->isNullValue() && + N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && + // FIXME: Implement this optzn for PPC64. + N->getValueType(0) == MVT::i32) { + SDNode *Tmp = + CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, + N->getOperand(0), getI32Imm(~0U)); + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, + SDValue(Tmp, 0), N->getOperand(0), + SDValue(Tmp, 1)); + } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); unsigned BROpc = getPredicateForSetCC(CC); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index dbb184c1f6bb..9741a3902af7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -125,10 +125,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); @@ -215,10 +217,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::VASTART , MVT::Other, Custom); // VAARG is custom lowered with the 32-bit SVR4 ABI. - if ( TM.getSubtarget<PPCSubtarget>().isSVR4ABI() - && !TM.getSubtarget<PPCSubtarget>().isPPC64()) + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() + && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { setOperationAction(ISD::VAARG, MVT::Other, Custom); - else + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } else setOperationAction(ISD::VAARG, MVT::Other, Expand); // Use the default implementation. @@ -1262,9 +1265,107 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + DebugLoc dl = Node->getDebugLoc(); + + assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); + + // gpr_index + SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + VAListPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = GprIndex.getValue(1); + + if (VT == MVT::i64) { + // Check if GprIndex is even + SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, + DAG.getConstant(0, MVT::i32), ISD::SETNE); + SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, + DAG.getConstant(1, MVT::i32)); + // Align GprIndex to be even if it isn't + GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, + GprIndex); + } + + // fpr index is 1 byte after gpr + SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(1, MVT::i32)); + + // fpr + SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, + FprPtr, MachinePointerInfo(SV), MVT::i8, + false, false, 0); + InChain = FprIndex.getValue(1); + + SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(8, MVT::i32)); + + SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, + DAG.getConstant(4, MVT::i32)); - llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!"); - return SDValue(); // Not reached + // areas + SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = OverflowArea.getValue(1); + + SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, + MachinePointerInfo(), false, false, 0); + InChain = RegSaveArea.getValue(1); + + // select overflow_area if index > 8 + SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(8, MVT::i32), ISD::SETLT); + + // adjustment constant gpr_index * 4/8 + SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + // OurReg = RegSaveArea + RegConstant + SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, + RegConstant); + + // Floating types are 32 bytes into RegSaveArea + if (VT.isFloatingPoint()) + OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, + DAG.getConstant(32, MVT::i32)); + + // increase {f,g}pr_index by 1 (or 2 if VT is i64) + SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, + VT.isInteger() ? GprIndex : FprIndex, + DAG.getConstant(VT == MVT::i64 ? 2 : 1, + MVT::i32)); + + InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, + VT.isInteger() ? VAListPtr : FprPtr, + MachinePointerInfo(SV), + MVT::i8, false, false, 0); + + // determine if we should load from reg_save_area or overflow_area + SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); + + // increase overflow_area by 4/8 if gpr/fpr > 8 + SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, + DAG.getConstant(VT.isInteger() ? 4 : 8, + MVT::i32)); + + OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, + OverflowAreaPlusN); + + InChain = DAG.getTruncStore(InChain, dl, OverflowArea, + OverflowAreaPtr, + MachinePointerInfo(), + MVT::i32, false, false, 0); + + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); } SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, @@ -1870,7 +1971,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + unsigned VReg; + if (isPPC64) + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + else + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), @@ -1889,7 +1994,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + unsigned VReg; + if (isPPC64) + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + else + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -2902,6 +3011,12 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); + // Set CR6 to true if this is a vararg call. + if (isVarArg) { + SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0); + RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); + } + // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -2911,13 +3026,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - // Set CR6 to true if this is a vararg call. - if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0); - Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); - InFlag = Chain.getValue(1); - } - if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); @@ -4422,11 +4530,27 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const { + const TargetMachine &TM = getTargetMachine(); DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::VAARG: { + if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() + || TM.getSubtarget<PPCSubtarget>().isPPC64()) + return; + + EVT VT = N->getValueType(0); + + if (VT == MVT::i64) { + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); + + Results.push_back(NewNode); + Results.push_back(NewNode.getValue(1)); + } + return; + } case ISD::FP_ROUND_INREG: { assert(N->getValueType(0) == MVT::ppcf128); assert(N->getOperand(0).getValueType() == MVT::ppcf128); @@ -4676,7 +4800,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, .addReg(TmpReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); - BuildMI(BB, dl, TII->get(PPC::STWCX)) + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 53b049135e24..143444fdc22b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -12,22 +12,26 @@ //===----------------------------------------------------------------------===// #include "PPCInstrInfo.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCPredicates.h" -#include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" #include "PPCHazardRecognizers.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/STLExtras.h" + +#define GET_INSTRINFO_CTOR +#include "PPCGenInstrInfo.inc" namespace llvm { extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. @@ -37,8 +41,8 @@ extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. using namespace llvm; PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) - : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), - RI(*TM.getSubtargetImpl(), *this) {} + : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + TM(tm), RI(*TM.getSubtargetImpl(), *this) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. @@ -120,7 +124,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // destination register as well. if (Reg0 == Reg1) { // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && + assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); Reg2IsKill = false; ChangeReg0 = true; @@ -315,12 +319,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else llvm_unreachable("Impossible reg-to-reg copy"); - const TargetInstrDesc &TID = get(Opc); - if (TID.getNumOperands() == 3) - BuildMI(MBB, I, DL, TID, DestReg) + const MCInstrDesc &MCID = get(Opc); + if (MCID.getNumOperands() == 3) + BuildMI(MBB, I, DL, MCID, DestReg) .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); else - BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } bool diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index b5249ae03769..90bacc96c87e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -18,6 +18,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "PPCRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "PPCGenInstrInfo.inc" + namespace llvm { /// PPCII - This namespace holds all of the PowerPC target-specific @@ -61,7 +64,7 @@ enum PPC970_Unit { } // end namespace PPCII -class PPCInstrInfo : public TargetInstrInfoImpl { +class PPCInstrInfo : public PPCGenInstrInfo { PPCTargetMachine &TM; const PPCRegisterInfo RI; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 78383e0603bd..4590f0045641 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -87,7 +87,7 @@ asm( // FIXME: could shrink frame // Set up a proper stack frame // FIXME Layout - // PowerPC64 ABI linkage - 24 bytes + // PowerPC32 ABI linkage - 24 bytes // parameters - 32 bytes // 13 double registers - 104 bytes // 8 int registers - 32 bytes @@ -205,11 +205,27 @@ void PPC32CompilationCallback() { #if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ defined(__ppc64__) +#ifdef __ELF__ +asm( + ".text\n" + ".align 2\n" + ".globl PPC64CompilationCallback\n" + ".section \".opd\",\"aw\"\n" + ".align 3\n" +"PPC64CompilationCallback:\n" + ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n" + ".size PPC64CompilationCallback,24\n" + ".previous\n" + ".align 4\n" + ".type PPC64CompilationCallback,@function\n" +".L.PPC64CompilationCallback:\n" +#else asm( ".text\n" ".align 2\n" ".globl _PPC64CompilationCallback\n" "_PPC64CompilationCallback:\n" +#endif // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the // FIXME: need to save v[0-19] for altivec? // Set up a proper stack frame @@ -218,49 +234,55 @@ asm( // parameters - 64 bytes // 13 double registers - 104 bytes // 8 int registers - 64 bytes - "mflr r0\n" - "std r0, 16(r1)\n" - "stdu r1, -280(r1)\n" + "mflr 0\n" + "std 0, 16(1)\n" + "stdu 1, -280(1)\n" // Save all int arg registers - "std r10, 272(r1)\n" "std r9, 264(r1)\n" - "std r8, 256(r1)\n" "std r7, 248(r1)\n" - "std r6, 240(r1)\n" "std r5, 232(r1)\n" - "std r4, 224(r1)\n" "std r3, 216(r1)\n" + "std 10, 272(1)\n" "std 9, 264(1)\n" + "std 8, 256(1)\n" "std 7, 248(1)\n" + "std 6, 240(1)\n" "std 5, 232(1)\n" + "std 4, 224(1)\n" "std 3, 216(1)\n" // Save all call-clobbered FP regs. - "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n" - "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n" - "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n" - "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n" - "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n" - "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n" - "stfd f1, 112(r1)\n" + "stfd 13, 208(1)\n" "stfd 12, 200(1)\n" + "stfd 11, 192(1)\n" "stfd 10, 184(1)\n" + "stfd 9, 176(1)\n" "stfd 8, 168(1)\n" + "stfd 7, 160(1)\n" "stfd 6, 152(1)\n" + "stfd 5, 144(1)\n" "stfd 4, 136(1)\n" + "stfd 3, 128(1)\n" "stfd 2, 120(1)\n" + "stfd 1, 112(1)\n" // Arguments to Compilation Callback: // r3 - our lr (address of the call instruction in stub plus 4) // r4 - stub's lr (address of instruction that called the stub plus 4) // r5 - is64Bit - always 1. - "mr r3, r0\n" - "ld r2, 280(r1)\n" // stub's frame - "ld r4, 16(r2)\n" // stub's lr - "li r5, 1\n" // 1 == 64 bit + "mr 3, 0\n" // return address (still in r0) + "ld 5, 280(1)\n" // stub's frame + "ld 4, 16(5)\n" // stub's lr + "li 5, 1\n" // 1 == 64 bit +#ifdef __ELF__ + "bl PPCCompilationCallbackC\n" + "nop\n" +#else "bl _PPCCompilationCallbackC\n" - "mtctr r3\n" +#endif + "mtctr 3\n" // Restore all int arg registers - "ld r10, 272(r1)\n" "ld r9, 264(r1)\n" - "ld r8, 256(r1)\n" "ld r7, 248(r1)\n" - "ld r6, 240(r1)\n" "ld r5, 232(r1)\n" - "ld r4, 224(r1)\n" "ld r3, 216(r1)\n" + "ld 10, 272(1)\n" "ld 9, 264(1)\n" + "ld 8, 256(1)\n" "ld 7, 248(1)\n" + "ld 6, 240(1)\n" "ld 5, 232(1)\n" + "ld 4, 224(1)\n" "ld 3, 216(1)\n" // Restore all FP arg registers - "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n" - "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n" - "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n" - "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n" - "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n" - "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n" - "lfd f1, 112(r1)\n" + "lfd 13, 208(1)\n" "lfd 12, 200(1)\n" + "lfd 11, 192(1)\n" "lfd 10, 184(1)\n" + "lfd 9, 176(1)\n" "lfd 8, 168(1)\n" + "lfd 7, 160(1)\n" "lfd 6, 152(1)\n" + "lfd 5, 144(1)\n" "lfd 4, 136(1)\n" + "lfd 3, 128(1)\n" "lfd 2, 120(1)\n" + "lfd 1, 112(1)\n" // Pop 3 frames off the stack and branch to target - "ld r1, 280(r1)\n" - "ld r2, 16(r1)\n" - "mtlr r2\n" + "ld 1, 280(1)\n" + "ld 0, 16(1)\n" + "mtlr 0\n" + // XXX: any special TOC handling in the ELF case for JIT? "bctr\n" ); #else diff --git a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp index 65c2c82c51a7..cf73d861fa4d 100644 --- a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp @@ -28,12 +28,10 @@ namespace { class PPCMCCodeEmitter : public MCCodeEmitter { PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - MCContext &Ctx; public: - PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx) - : TM(tm), Ctx(ctx) { + PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) { } ~PPCMCCodeEmitter() {} @@ -79,9 +77,10 @@ public: } // end anonymous namespace -MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM, +MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, MCContext &Ctx) { - return new PPCMCCodeEmitter(TM, Ctx); + return new PPCMCCodeEmitter(MCII, STI, Ctx); } unsigned PPCMCCodeEmitter:: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 3374e9b0b631..9c2428b92e65 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -44,6 +44,9 @@ #include "llvm/ADT/STLExtras.h" #include <cstdlib> +#define GET_REGINFO_TARGET_DESC +#include "PPCGenRegisterInfo.inc" + // FIXME (64-bit): Eventually enable by default. namespace llvm { cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", @@ -110,8 +113,7 @@ unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) { PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) - : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - Subtarget(ST), TII(tii) { + : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -504,6 +506,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); unsigned SrcReg = MI.getOperand(0).getReg(); + bool LP64 = Subtarget.isPPC64(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. @@ -520,7 +523,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, .addImm(0) .addImm(31); - addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), FrameIndex); @@ -709,5 +712,3 @@ int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour); } - -#include "PPCGenRegisterInfo.inc" diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 48c25625ea9b..33fe5ebcf4cd 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -16,9 +16,11 @@ #define POWERPC32_REGISTERINFO_H #include "PPC.h" -#include "PPCGenRegisterInfo.h.inc" #include <map> +#define GET_REGINFO_HEADER +#include "PPCGenRegisterInfo.inc" + namespace llvm { class PPCSubtarget; class TargetInstrInfo; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 3c0190199a82..1acdf4eb853b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -276,15 +276,13 @@ def RM: SPR<512, "**ROUNDING MODE**">; /// Register classes // Allocate volatiles first // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 -def GPRC : RegisterClass<"PPC", [i32], 32, - [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, - R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17, - R16, R15, R14, R13, R31, R0, R1, LR]>; +def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), + (sequence "R%u", 30, 13), + R31, R0, R1, LR)>; -def G8RC : RegisterClass<"PPC", [i64], 64, - [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, - X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17, - X16, X15, X14, X31, X13, X0, X1, LR8]>; +def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), + (sequence "X%u", 30, 14), + X31, X13, X0, X1, LR8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the @@ -293,41 +291,36 @@ def G8RC : RegisterClass<"PPC", [i64], 64, // previous stack frame. By allocating non-volatiles in reverse order we make // sure that the Floating-point register save area is always as small as // possible because there aren't any unused spill slots. -def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23, - F22, F21, F20, F19, F18, F17, F16, F15, F14]>; -def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23, - F22, F21, F20, F19, F18, F17, F16, F15, F14]>; +def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13), + (sequence "F%u", 31, 14))>; +def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, - [V2, V3, V4, V5, V0, V1, - V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, - V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>; + (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, + V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, + V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; def CRBITRC : RegisterClass<"PPC", [i32], 32, - [CR0LT, CR0GT, CR0EQ, CR0UN, - CR1LT, CR1GT, CR1EQ, CR1UN, - CR2LT, CR2GT, CR2EQ, CR2UN, - CR3LT, CR3GT, CR3EQ, CR3UN, - CR4LT, CR4GT, CR4EQ, CR4UN, - CR5LT, CR5GT, CR5EQ, CR5UN, - CR6LT, CR6GT, CR6EQ, CR6UN, - CR7LT, CR7GT, CR7EQ, CR7UN - ]> + (add CR0LT, CR0GT, CR0EQ, CR0UN, + CR1LT, CR1GT, CR1EQ, CR1UN, + CR2LT, CR2GT, CR2EQ, CR2UN, + CR3LT, CR3GT, CR3EQ, CR3UN, + CR4LT, CR4GT, CR4EQ, CR4UN, + CR5LT, CR5GT, CR5EQ, CR5UN, + CR6LT, CR6GT, CR6EQ, CR6UN, + CR7LT, CR7GT, CR7EQ, CR7UN)> { let CopyCost = -1; } -def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, - CR3, CR4]> -{ +def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, + CR7, CR2, CR3, CR4)> { let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)]; } -def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; -def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; -def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>; -def CARRYRC : RegisterClass<"PPC", [i32], 32, [CARRY]> { +def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>; +def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>; +def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; +def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> { let CopyCost = -1; } diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 5f3aa2328f9e..5ea9b0f6596c 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the PPC specific subclass of TargetSubtarget. +// This file implements the PPC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// @@ -15,8 +15,13 @@ #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" -#include "PPCGenSubtarget.inc" +#include "llvm/Target/TargetRegistry.h" #include <cstdlib> + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "PPCGenSubtargetInfo.inc" + using namespace llvm; #if defined(__APPLE__) @@ -57,9 +62,10 @@ static const char *GetCurrentPowerPCCPU() { #endif -PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) - : StackAlignment(16) +PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PPCGenSubtargetInfo(TT, CPU, FS) + , StackAlignment(16) , DarwinDirective(PPC::DIR_NONE) , IsGigaProcessor(false) , Has64BitSupport(false) @@ -73,13 +79,19 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, , TargetTriple(TT) { // Determine default and user specified characteristics - std::string CPU = "generic"; + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; #if defined(__APPLE__) - CPU = GetCurrentPowerPCCPU(); + if (CPUName == "generic") + CPUName = GetCurrentPowerPCCPU(); #endif // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); // If we are generating code for ppc64, verify that options make sense. if (is64Bit) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 8fd1a447692d..e028de6b09de 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -7,23 +7,26 @@ // //===----------------------------------------------------------------------===// // -// This file declares the PowerPC specific subclass of TargetSubtarget. +// This file declares the PowerPC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" -#include "llvm/Target/TargetInstrItineraries.h" -#include "llvm/Target/TargetSubtarget.h" - #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "PPCGenSubtargetInfo.inc" + // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC namespace llvm { +class StringRef; namespace PPC { // -m directive values. @@ -43,7 +46,7 @@ namespace PPC { class GlobalValue; class TargetMachine; -class PPCSubtarget : public TargetSubtarget { +class PPCSubtarget : public PPCGenSubtargetInfo { protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. @@ -73,13 +76,12 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. /// - PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); - + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. @@ -104,7 +106,7 @@ public: // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" - : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32"; + : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; } /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index d27e54e56699..e0ea5adba751 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "PPCMCAsmInfo.h" #include "PPCTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" @@ -21,15 +20,6 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - bool isPPC64 = TheTriple.getArch() == Triple::ppc64; - if (TheTriple.isOSDarwin()) - return new PPCMCAsmInfoDarwin(isPPC64); - return new PPCLinuxMCAsmInfo(isPPC64); - -} - // This is duplicated code. Refactor this. static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, @@ -48,9 +38,6 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); - RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo); - RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo); - // Register the MC Code Emitter TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); @@ -67,9 +54,10 @@ extern "C" void LLVMInitializePowerPCTarget() { PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64Bit), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), @@ -88,14 +76,16 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : PPCTargetMachine(T, TT, FS, false) { + : PPCTargetMachine(T, TT, CPU, FS, false) { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : PPCTargetMachine(T, TT, FS, true) { + : PPCTargetMachine(T, TT, CPU, FS, true) { } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 2d2498943a2d..baf07e3498f8 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -41,7 +41,8 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { @@ -77,7 +78,7 @@ public: class PPC32TargetMachine : public PPCTargetMachine { public: PPC32TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. @@ -85,7 +86,7 @@ public: class PPC64TargetMachine : public PPCTargetMachine { public: PPC64TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; } // end namespace llvm diff --git a/lib/Target/README.txt b/lib/Target/README.txt index fcec368a213b..4cc95340890d 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -870,11 +870,6 @@ rshift_gt (unsigned int a) bar (); } -void neg_eq_cst(unsigned int a) { -if (-a == 123) -bar(); -} - All should simplify to a single comparison. All of these are currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1767,7 +1762,6 @@ case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// -Switch lowering generates less than ideal code for the following switch: define void @a(i32 %x) nounwind { entry: switch i32 %x, label %if.end [ @@ -1788,19 +1782,15 @@ declare void @foo() Generated code on x86-64 (other platforms give similar results): a: cmpl $5, %edi - ja .LBB0_2 - movl %edi, %eax - movl $47, %ecx - btq %rax, %rcx - jb .LBB0_3 + ja LBB2_2 + cmpl $4, %edi + jne LBB2_3 .LBB0_2: ret .LBB0_3: jmp foo # TAILCALL -The movl+movl+btq+jb could be simplified to a cmpl+jne. - -Or, if we wanted to be really clever, we could simplify the whole thing to +If we wanted to be really clever, we could simplify the whole thing to something like the following, which eliminates a branch: xorl $1, %edi cmpl $4, %edi diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 6839234a4700..c77ded4b435e 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -1,13 +1,10 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td) -tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SparcGenRegisterNames.inc -gen-register-enums) -tablegen(SparcGenRegisterInfo.inc -gen-register-desc) -tablegen(SparcGenInstrNames.inc -gen-instr-enums) -tablegen(SparcGenInstrInfo.inc -gen-instr-desc) +tablegen(SparcGenRegisterInfo.inc -gen-register-info) +tablegen(SparcGenInstrInfo.inc -gen-instr-info) tablegen(SparcGenAsmWriter.inc -gen-asm-writer) tablegen(SparcGenDAGISel.inc -gen-dag-isel) -tablegen(SparcGenSubtarget.inc -gen-subtarget) +tablegen(SparcGenSubtargetInfo.inc -gen-subtarget) tablegen(SparcGenCallingConv.inc -gen-callingconv) add_llvm_target(SparcCodeGen @@ -18,7 +15,6 @@ add_llvm_target(SparcCodeGen SparcISelDAGToDAG.cpp SparcISelLowering.cpp SparcFrameLowering.cpp - SparcMCAsmInfo.cpp SparcRegisterInfo.cpp SparcSubtarget.cpp SparcTargetMachine.cpp @@ -26,3 +22,4 @@ add_llvm_target(SparcCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 4b12852ef873..dab35e5e4e6f 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -298,7 +298,7 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, return false; if (candidate->getOpcode() == SP::UNIMP) return true; - const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); + const MCInstrDesc &prevdesc = (--candidate)->getDesc(); return prevdesc.hasDelaySlot(); } diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..1e8c02979887 --- /dev/null +++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMSparcDesc + SparcMCTargetDesc.cpp + SparcMCAsmInfo.cpp + ) diff --git a/lib/Target/Sparc/MCTargetDesc/Makefile b/lib/Target/Sparc/MCTargetDesc/Makefile new file mode 100644 index 000000000000..abcbe2da18ec --- /dev/null +++ b/lib/Target/Sparc/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMSparcDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index d37d6d231305..6a7e0902354e 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -12,9 +12,16 @@ //===----------------------------------------------------------------------===// #include "SparcMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + using namespace llvm; SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { + IsLittleEndian = false; + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::sparcv9) + PointerSize = 8; + Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = 0; // .xword is only supported by V9. diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 0cb6827d2771..0cb6827d2771 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp new file mode 100644 index 000000000000..cb92a2bfd417 --- /dev/null +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -0,0 +1,57 @@ +//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Sparc specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "SparcMCTargetDesc.h" +#include "SparcMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "SparcGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SparcGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SparcGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createSparcMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSparcMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeSparcMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); +} + +static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitSparcMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeSparcMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, + createSparcMCSubtargetInfo); +} + +extern "C" void LLVMInitializeSparcMCAsmInfo() { + RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget); + RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target); +} diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h new file mode 100644 index 000000000000..2fd9e3f4cbd3 --- /dev/null +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -0,0 +1,41 @@ +//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Sparc specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCMCTARGETDESC_H +#define SPARCMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheSparcTarget; +extern Target TheSparcV9Target; + +} // End llvm namespace + +// Defines symbolic names for Sparc registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "SparcGenRegisterInfo.inc" + +// Defines symbolic names for the Sparc instructions. +// +#define GET_INSTRINFO_ENUM +#include "SparcGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SparcGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index 27942c56fb3a..4b81ada956f2 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -12,12 +12,11 @@ LIBRARYNAME = LLVMSparcCodeGen TARGET = Sparc # Make sure that tblgen is run, first thing. -BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ - SparcGenRegisterInfo.inc SparcGenInstrNames.inc \ - SparcGenInstrInfo.inc SparcGenAsmWriter.inc \ - SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc +BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \ + SparcGenAsmWriter.inc SparcGenDAGISel.inc \ + SparcGenSubtargetInfo.inc SparcGenCallingConv.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index a37920d80308..7b2c6141dbf8 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -15,6 +15,7 @@ #ifndef TARGET_SPARC_H #define TARGET_SPARC_H +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include <cassert> @@ -28,21 +29,8 @@ namespace llvm { FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); FunctionPass *createSparcFPMoverPass(TargetMachine &TM); - extern Target TheSparcTarget; - extern Target TheSparcV9Target; - } // end namespace llvm; -// Defines symbolic names for Sparc registers. This defines a mapping from -// register name to register number. -// -#include "SparcGenRegisterNames.inc" - -// Defines symbolic names for the Sparc instructions. -// -#include "SparcGenInstrNames.inc" - - namespace llvm { // Enums corresponding to Sparc condition codes, both icc's and fcc's. These // values must be kept in sync with the ones in the .td file. diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 0b4612df4e43..6f30d3fd6c35 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1,4 +1,3 @@ - //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===// // // The LLVM Compiler Infrastructure @@ -755,9 +754,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTLZ , MVT::i32, Expand); @@ -1265,26 +1266,6 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } -std::vector<unsigned> SparcTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { - default: break; - case 'r': - return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3, - SP::L4, SP::L5, SP::L6, SP::L7, - SP::I0, SP::I1, SP::I2, SP::I3, - SP::I4, SP::I5, - SP::O0, SP::O1, SP::O2, SP::O3, - SP::O4, SP::O5, SP::O7, 0); - } - - return std::vector<unsigned>(); -} - bool SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Sparc target isn't yet aware of offsets. diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 9ea6e16e3ac1..8a1886a856e0 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -65,9 +65,6 @@ namespace llvm { ConstraintType getConstraintType(const std::string &Constraint) const; std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index afa3c1f88f96..4e3ddf839985 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -12,19 +12,23 @@ //===----------------------------------------------------------------------===// #include "SparcInstrInfo.h" -#include "SparcSubtarget.h" #include "Sparc.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" +#include "SparcMachineFunctionInfo.h" +#include "SparcSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +#define GET_INSTRINFO_CTOR #include "SparcGenInstrInfo.inc" -#include "SparcMachineFunctionInfo.h" + using namespace llvm; SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) - : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)), + : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(ST, *this), Subtarget(ST) { } diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index b2d24f52503b..eda64efb7a03 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -17,6 +17,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "SparcRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "SparcGenInstrInfo.inc" + namespace llvm { /// SPII - This namespace holds all of the target specific flags that @@ -31,7 +34,7 @@ namespace SPII { }; } -class SparcInstrInfo : public TargetInstrInfoImpl { +class SparcInstrInfo : public SparcGenInstrInfo { const SparcRegisterInfo RI; const SparcSubtarget& Subtarget; public: diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 9fcf028fa60e..0acdd2c55d6b 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -23,12 +23,15 @@ #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" + +#define GET_REGINFO_TARGET_DESC +#include "SparcGenRegisterInfo.inc" + using namespace llvm; SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii) - : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), - Subtarget(st), TII(tii) { + : SparcGenRegisterInfo(), Subtarget(st), TII(tii) { } const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -135,6 +138,3 @@ int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0); } - -#include "SparcGenRegisterInfo.inc" - diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 56c8068228f0..ec9e63a686bc 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -15,7 +15,9 @@ #define SPARCREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "SparcGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "SparcGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index 0729818e85b8..cf928293c169 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -139,23 +139,21 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>; // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7, - I0, I1, I2, I3, I4, I5, - O0, O1, O2, O3, O4, O5, O7, - G1, - // Non-allocatable regs: - G2, G3, G4, // FIXME: OK for use only in - // applications, not libraries. - O6, // stack ptr - I6, // frame ptr - I7, // return address - G0, // constant zero - G5, G6, G7 // reserved for kernel - ]>; +def IntRegs : RegisterClass<"SP", [i32], 32, + (add L0, L1, L2, L3, L4, L5, L6, + L7, I0, I1, I2, I3, I4, I5, + O0, O1, O2, O3, O4, O5, O7, + G1, + // Non-allocatable regs: + G2, G3, G4, // FIXME: OK for use only in + // applications, not libraries. + O6, // stack ptr + I6, // frame ptr + I7, // return address + G0, // constant zero + G5, G6, G7 // reserved for kernel + )>; -def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8, - F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, - F23, F24, F25, F26, F27, F28, F29, F30, F31]>; +def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; -def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15]>; +def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>; diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index ce11af1fa842..de647e8221a2 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -7,28 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file implements the SPARC specific subclass of TargetSubtarget. +// This file implements the SPARC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "SparcSubtarget.h" -#include "SparcGenSubtarget.inc" +#include "Sparc.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SparcGenSubtargetInfo.inc" + using namespace llvm; -SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) : +SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) : + SparcGenSubtargetInfo(TT, CPU, FS), IsV9(false), V8DeprecatedInsts(false), IsVIS(false), Is64Bit(is64Bit) { // Determine default and user specified characteristics - const char *CPU = "v8"; - if (is64Bit) { - CPU = "v9"; - IsV9 = true; + std::string CPUName = CPU; + if (CPUName.empty()) { + if (is64Bit) + CPUName = "v9"; + else + CPUName = "v8"; } + IsV9 = CPUName == "v9"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); } diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index cec0ab422bc2..00a04c3bea57 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -7,26 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This file declares the SPARC specific subclass of TargetSubtarget. +// This file declares the SPARC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef SPARC_SUBTARGET_H #define SPARC_SUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "SparcGenSubtargetInfo.inc" + namespace llvm { +class StringRef; -class SparcSubtarget : public TargetSubtarget { +class SparcSubtarget : public SparcGenSubtargetInfo { bool IsV9; bool V8DeprecatedInsts; bool IsVIS; bool Is64Bit; public: - SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit); + SparcSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64bit); bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } @@ -34,8 +39,7 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool is64Bit() const { return Is64Bit; } std::string getDataLayout() const { diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index b84eab568d29..cbe6d8754efd 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "Sparc.h" -#include "SparcMCAsmInfo.h" #include "SparcTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" @@ -21,18 +20,15 @@ extern "C" void LLVMInitializeSparcTarget() { // Register the target. RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget); RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target); - - RegisterAsmInfo<SparcELFMCAsmInfo> A(TheSparcTarget); - RegisterAsmInfo<SparcELFMCAsmInfo> B(TheSparcV9Target); - } /// SparcTargetMachine ctor - Create an ILP32 architecture model /// SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64bit) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64bit), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameLowering(Subtarget) { @@ -56,12 +52,14 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : SparcTargetMachine(T, TT, FS, false) { + : SparcTargetMachine(T, TT, CPU, FS, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : SparcTargetMachine(T, TT, FS, true) { + : SparcTargetMachine(T, TT, CPU, FS, true) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index c4bb6bd776d4..799fc497f4ae 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -34,7 +34,8 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcFrameLowering FrameLowering; public: SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64bit); + const std::string &CPU, const std::string &FS, + bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -62,7 +63,7 @@ public: class SparcV8TargetMachine : public SparcTargetMachine { public: SparcV8TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; /// SparcV9TargetMachine - Sparc 64-bit target machine @@ -70,7 +71,7 @@ public: class SparcV9TargetMachine : public SparcTargetMachine { public: SparcV9TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); }; } // end namespace llvm diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 1f5d3552ae7e..f4bdbd8cd173 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -1,14 +1,11 @@ set(LLVM_TARGET_DEFINITIONS SystemZ.td) -tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(SystemZGenRegisterNames.inc -gen-register-enums) -tablegen(SystemZGenRegisterInfo.inc -gen-register-desc) -tablegen(SystemZGenInstrNames.inc -gen-instr-enums) -tablegen(SystemZGenInstrInfo.inc -gen-instr-desc) +tablegen(SystemZGenRegisterInfo.inc -gen-register-info) +tablegen(SystemZGenInstrInfo.inc -gen-instr-info) tablegen(SystemZGenAsmWriter.inc -gen-asm-writer) tablegen(SystemZGenDAGISel.inc -gen-dag-isel) tablegen(SystemZGenCallingConv.inc -gen-callingconv) -tablegen(SystemZGenSubtarget.inc -gen-subtarget) +tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(SystemZCodeGen SystemZAsmPrinter.cpp @@ -16,7 +13,6 @@ add_llvm_target(SystemZCodeGen SystemZISelLowering.cpp SystemZInstrInfo.cpp SystemZFrameLowering.cpp - SystemZMCAsmInfo.cpp SystemZRegisterInfo.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp @@ -24,3 +20,4 @@ add_llvm_target(SystemZCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..2ac90164721f --- /dev/null +++ b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMSystemZDesc + SystemZMCTargetDesc.cpp + SystemZMCAsmInfo.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile new file mode 100644 index 000000000000..08f1a9d51fb5 --- /dev/null +++ b/lib/Target/SystemZ/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMSystemZDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 2dc7e7bd29bb..8540546b62d3 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -18,6 +18,8 @@ using namespace llvm; SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { + IsLittleEndian = false; + PointerSize = 8; PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h index a6a27e2f4b6d..a6a27e2f4b6d 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp new file mode 100644 index 000000000000..5a826a6ef887 --- /dev/null +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -0,0 +1,58 @@ +//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides SystemZ specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCTargetDesc.h" +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SystemZGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SystemZGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createSystemZMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSystemZMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeSystemZMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, + createSystemZMCInstrInfo); +} + +static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitSystemZMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeSystemZMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, + createSystemZMCSubtargetInfo); +} + +extern "C" void LLVMInitializeSystemZMCAsmInfo() { + RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget); +} diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h new file mode 100644 index 000000000000..e2ad5afd6e57 --- /dev/null +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides SystemZ specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZMCTARGETDESC_H +#define SYSTEMZMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheSystemZTarget; + +} // End llvm namespace + +// Defines symbolic names for SystemZ registers. +// This defines a mapping from register name to register number. +#define GET_REGINFO_ENUM +#include "SystemZGenRegisterInfo.inc" + +// Defines symbolic names for the SystemZ instructions. +#define GET_INSTRINFO_ENUM +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SystemZGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index 6930e14c061e..6356491debeb 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -12,12 +12,11 @@ LIBRARYNAME = LLVMSystemZCodeGen TARGET = SystemZ # Make sure that tblgen is run, first thing. -BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \ - SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \ - SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \ - SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc +BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \ + SystemZGenAsmWriter.inc SystemZGenDAGISel.inc \ + SystemZGenSubtargetInfo.inc SystemZGenCallingConv.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index ea5240a10c9a..88960b9cc601 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_SystemZ_H #define LLVM_TARGET_SystemZ_H +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -47,15 +48,5 @@ namespace llvm { FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); - extern Target TheSystemZTarget; - } // end namespace llvm; - -// Defines symbolic names for SystemZ registers. -// This defines a mapping from register name to register number. -#include "SystemZGenRegisterNames.inc" - -// Defines symbolic names for the SystemZ instructions. -#include "SystemZGenInstrNames.inc" - #endif diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index af85df53b059..871c2972a8c4 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -142,6 +142,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); // We have only 64-bit bitconverts setOperationAction(ISD::BITCAST, MVT::f32, Expand); diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h index 2f2ef08dece1..ab45ec5984e3 100644 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -108,11 +108,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); unsigned Flags = 0; - if (TID.mayLoad()) + if (MCID.mayLoad()) Flags |= MachineMemOperand::MOLoad; - if (TID.mayStore()) + if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo( diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index be5280323c34..99e2730609e8 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -16,17 +16,21 @@ #include "SystemZInstrInfo.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" -#include "SystemZGenInstrInfo.inc" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" + +#define GET_INSTRINFO_CTOR +#include "SystemZGenInstrInfo.inc" + using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) - : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)), + : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN), RI(tm, *this), TM(tm) { } @@ -199,13 +203,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { } bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -343,7 +347,7 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } -const TargetInstrDesc& +const MCInstrDesc& SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const { switch (CC) { default: @@ -408,7 +412,7 @@ SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const { } } -const TargetInstrDesc& +const MCInstrDesc& SystemZInstrInfo::getLongDispOpc(unsigned Opc) const { switch (Opc) { default: diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 6cb720010207..6a31e9496365 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -19,6 +19,9 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/Target/TargetInstrInfo.h" +#define GET_INSTRINFO_HEADER +#include "SystemZGenInstrInfo.inc" + namespace llvm { class SystemZTargetMachine; @@ -47,7 +50,7 @@ namespace SystemZII { }; } -class SystemZInstrInfo : public TargetInstrInfoImpl { +class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; SystemZTargetMachine &TM; public: @@ -94,10 +97,10 @@ public: SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const; - const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const; - const TargetInstrDesc& getLongDispOpc(unsigned Opc) const; + const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const; + const MCInstrDesc& getLongDispOpc(unsigned Opc) const; - const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const { + const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const { if (Offset < 0 || Offset >= 4096) return getLongDispOpc(Opc); else diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index ed62cfff08aa..59692e883366 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -25,12 +25,15 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_TARGET_DESC +#include "SystemZGenRegisterInfo.inc" + using namespace llvm; SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii) - : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN), - TM(tm), TII(tii) { + : SystemZGenRegisterInfo(), TM(tm), TII(tii) { } const unsigned* @@ -51,10 +54,20 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const BitVector Reserved(getNumRegs()); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (TFI->hasFP(MF)) + if (TFI->hasFP(MF)) { + // R11D is the frame pointer. Reserve all aliases. Reserved.set(SystemZ::R11D); + Reserved.set(SystemZ::R11W); + Reserved.set(SystemZ::R10P); + Reserved.set(SystemZ::R10Q); + } + Reserved.set(SystemZ::R14D); Reserved.set(SystemZ::R15D); + Reserved.set(SystemZ::R14W); + Reserved.set(SystemZ::R15W); + Reserved.set(SystemZ::R14P); + Reserved.set(SystemZ::R14Q); return Reserved; } @@ -143,6 +156,3 @@ int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { assert(0 && "What is the dwarf register number"); return -1; } - - -#include "SystemZGenRegisterInfo.inc" diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index cd8f20fee617..2e262e1acc30 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -15,7 +15,9 @@ #define SystemZREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "SystemZGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "SystemZGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 9313ffdb4a0b..a24cbcf4ccd8 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -161,318 +161,45 @@ def F15L : FPRL<15, "f15", [F15S]>; // Status register def PSW : SystemZReg<"psw">; -/// Register classes -def GR32 : RegisterClass<"SystemZ", [i32], 32, - // Volatile registers - [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W, - // Frame pointer, sometimes allocable - R11W, - // Volatile, but not allocable - R14W, R15W]> -{ - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_REG32[] = { - SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, - SystemZ::R5W, SystemZ::R0W, SystemZ::R12W, SystemZ::R11W, - SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, - SystemZ::R6W, SystemZ::R14W, SystemZ::R13W - }; - static const unsigned SystemZ_REG32_nofp[] = { - SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, - SystemZ::R5W, SystemZ::R0W, SystemZ::R12W, /* No R11W */ - SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, - SystemZ::R6W, SystemZ::R14W, SystemZ::R13W - }; - GR32Class::iterator - GR32Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG32_nofp; - else - return SystemZ_REG32; - } - GR32Class::iterator - GR32Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned)); - else - return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned)); - } - }]; -} +/// Register classes. +/// Allocate the callee-saved R6-R12 backwards. That way they can be saved +/// together with R14 and R15 in one prolog instruction. +def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW", 0, 5), + (sequence "R%uW", 15, 6))>; /// Registers used to generate address. Everything except R0. -def ADDR32 : RegisterClass<"SystemZ", [i32], 32, - // Volatile registers - [R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W, - // Frame pointer, sometimes allocable - R11W, - // Volatile, but not allocable - R14W, R15W]> -{ - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_ADDR32[] = { - SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, - SystemZ::R5W, /* No R0W */ SystemZ::R12W, SystemZ::R11W, - SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, - SystemZ::R6W, SystemZ::R14W, SystemZ::R13W - }; - static const unsigned SystemZ_ADDR32_nofp[] = { - SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, - SystemZ::R5W, /* No R0W */ SystemZ::R12W, /* No R11W */ - SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, - SystemZ::R6W, SystemZ::R14W, SystemZ::R13W - }; - ADDR32Class::iterator - ADDR32Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_ADDR32_nofp; - else - return SystemZ_ADDR32; - } - ADDR32Class::iterator - ADDR32Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned)); - else - return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned)); - } - }]; -} +def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>; -def GR64 : RegisterClass<"SystemZ", [i64], 64, - // Volatile registers - [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D, - // Frame pointer, sometimes allocable - R11D, - // Volatile, but not allocable - R14D, R15D]> -{ +def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD", 0, 5), + (sequence "R%uD", 15, 6))> { let SubRegClasses = [(GR32 subreg_32bit)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_REG64[] = { - SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, - SystemZ::R5D, SystemZ::R0D, SystemZ::R12D, SystemZ::R11D, - SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, - SystemZ::R6D, SystemZ::R14D, SystemZ::R13D - }; - static const unsigned SystemZ_REG64_nofp[] = { - SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, - SystemZ::R5D, SystemZ::R0D, SystemZ::R12D, /* No R11D */ - SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, - SystemZ::R6D, SystemZ::R14D, SystemZ::R13D - }; - GR64Class::iterator - GR64Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG64_nofp; - else - return SystemZ_REG64; - } - GR64Class::iterator - GR64Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned)); - else - return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned)); - } - }]; } -def ADDR64 : RegisterClass<"SystemZ", [i64], 64, - // Volatile registers - [R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D, - // Frame pointer, sometimes allocable - R11D, - // Volatile, but not allocable - R14D, R15D]> -{ +def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> { let SubRegClasses = [(ADDR32 subreg_32bit)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_ADDR64[] = { - SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, - SystemZ::R5D, /* No R0D */ SystemZ::R12D, SystemZ::R11D, - SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, - SystemZ::R6D, SystemZ::R14D, SystemZ::R13D - }; - static const unsigned SystemZ_ADDR64_nofp[] = { - SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, - SystemZ::R5D, /* No R0D */ SystemZ::R12D, /* No R11D */ - SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, - SystemZ::R6D, SystemZ::R14D, SystemZ::R13D - }; - ADDR64Class::iterator - ADDR64Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_ADDR64_nofp; - else - return SystemZ_ADDR64; - } - ADDR64Class::iterator - ADDR64Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned)); - else - return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned)); - } - }]; } // Even-odd register pairs -def GR64P : RegisterClass<"SystemZ", [v2i32], 64, - [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> -{ +def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P, + R12P, R10P, R8P, R6P, + R14P)> { let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_REG64P[] = { - SystemZ::R0P, SystemZ::R2P, SystemZ::R4P, SystemZ::R10P, - SystemZ::R8P, SystemZ::R6P }; - static const unsigned SystemZ_REG64P_nofp[] = { - SystemZ::R0P, SystemZ::R2P, SystemZ::R4P, /* NO R10P */ - SystemZ::R8P, SystemZ::R6P }; - GR64PClass::iterator - GR64PClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG64P_nofp; - else - return SystemZ_REG64P; - } - GR64PClass::iterator - GR64PClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned)); - else - return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned)); - } - }]; } -def GR128 : RegisterClass<"SystemZ", [v2i64], 128, - [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> -{ +def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q, + R12Q, R10Q, R8Q, R6Q, + R14Q)> { let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32), - (GR64 subreg_even, subreg_odd)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_REG128[] = { - SystemZ::R0Q, SystemZ::R2Q, SystemZ::R4Q, SystemZ::R10Q, - SystemZ::R8Q, SystemZ::R6Q }; - static const unsigned SystemZ_REG128_nofp[] = { - SystemZ::R0Q, SystemZ::R2Q, SystemZ::R4Q, /* NO R10Q */ - SystemZ::R8Q, SystemZ::R6Q }; - GR128Class::iterator - GR128Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG128_nofp; - else - return SystemZ_REG128; - } - GR128Class::iterator - GR128Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - if (TFI->hasFP(MF)) - return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned)); - else - return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned)); - } - }]; + (GR64 subreg_even, subreg_odd)]; } -def FP32 : RegisterClass<"SystemZ", [f32], 32, - [F0S, F1S, F2S, F3S, F4S, F5S, F6S, F7S, - F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_REGFP32[] = { - SystemZ::F0S, SystemZ::F2S, SystemZ::F4S, SystemZ::F6S, - SystemZ::F1S, SystemZ::F3S, SystemZ::F5S, SystemZ::F7S, - SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, - SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S }; - FP32Class::iterator - FP32Class::allocation_order_begin(const MachineFunction &MF) const { - return SystemZ_REGFP32; - } - FP32Class::iterator - FP32Class::allocation_order_end(const MachineFunction &MF) const { - return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned)); - } - }]; -} +def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>; -def FP64 : RegisterClass<"SystemZ", [f64], 64, - [F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L, - F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> { +def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> { let SubRegClasses = [(FP32 subreg_32bit)]; - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned SystemZ_REGFP64[] = { - SystemZ::F0L, SystemZ::F2L, SystemZ::F4L, SystemZ::F6L, - SystemZ::F1L, SystemZ::F3L, SystemZ::F5L, SystemZ::F7L, - SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, - SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L }; - FP64Class::iterator - FP64Class::allocation_order_begin(const MachineFunction &MF) const { - return SystemZ_REGFP64; - } - FP64Class::iterator - FP64Class::allocation_order_end(const MachineFunction &MF) const { - return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned)); - } - }]; } // Status flags registers. -def CCR : RegisterClass<"SystemZ", [i64], 64, [PSW]> { +def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> { let CopyCost = -1; // Don't allow copying of status registers. } diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index a8b5e1f18679..b3ed06639758 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -7,25 +7,32 @@ // //===----------------------------------------------------------------------===// // -// This file implements the SystemZ specific subclass of TargetSubtarget. +// This file implements the SystemZ specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "SystemZSubtarget.h" #include "SystemZ.h" -#include "SystemZGenSubtarget.inc" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SystemZGenSubtargetInfo.inc" using namespace llvm; SystemZSubtarget::SystemZSubtarget(const std::string &TT, + const std::string &CPU, const std::string &FS): - HasZ10Insts(false) { - std::string CPU = "z9"; + SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) { + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "z9"; // Parse features string. - ParseSubtargetFeatures(FS, CPU); + ParseSubtargetFeatures(CPUName, FS); } /// True if accessing the GV requires an extra load. diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 405d6e91b7ee..55cfd80002bc 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -7,33 +7,36 @@ // //===----------------------------------------------------------------------===// // -// This file declares the SystemZ specific subclass of TargetSubtarget. +// This file declares the SystemZ specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_TARGET_SystemZ_SUBTARGET_H #define LLVM_TARGET_SystemZ_SUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" - +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "SystemZGenSubtargetInfo.inc" + namespace llvm { class GlobalValue; +class StringRef; class TargetMachine; -class SystemZSubtarget : public TargetSubtarget { +class SystemZSubtarget : public SystemZGenSubtargetInfo { bool HasZ10Insts; public: /// This constructor initializes the data members to match that /// of the specified triple. /// - SystemZSubtarget(const std::string &TT, const std::string &FS); + SystemZSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool isZ10() const { return HasZ10Insts; } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 160389942998..48298cc744e7 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#include "SystemZMCAsmInfo.h" #include "SystemZTargetMachine.h" #include "SystemZ.h" #include "llvm/PassManager.h" @@ -17,16 +16,16 @@ using namespace llvm; extern "C" void LLVMInitializeSystemZTarget() { // Register the target. RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget); - RegisterAsmInfo<SystemZMCAsmInfo> Y(TheSystemZTarget); } /// SystemZTargetMachine ctor - Create an ILP64 architecture model /// SystemZTargetMachine::SystemZTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 524f83d13229..e40b556c0c3c 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -38,7 +38,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { SystemZFrameLowering FrameLowering; public: SystemZTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 0919fe42dc0e..a42ce548c895 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -97,10 +97,6 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructT return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element); } -void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) { - unwrap(TD)->InvalidateStructLayoutInfo(unwrap<StructType>(StructTy)); -} - void LLVMDisposeTargetData(LLVMTargetDataRef TD) { delete unwrap(TD); } diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp index 6fa5420120f5..a97b0e868989 100644 --- a/lib/Target/TargetAsmInfo.cpp +++ b/lib/Target/TargetAsmInfo.cpp @@ -17,11 +17,7 @@ using namespace llvm; TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) { TLOF = &TM.getTargetLowering()->getObjFileLowering(); - const TargetData &TD = *TM.getTargetData(); - IsLittleEndian = TD.isLittleEndian(); - PointerSize = TD.getPointerSize(); - const TargetFrameLowering &TFI = *TM.getFrameLowering(); - StackDir = TFI.getStackGrowthDirection(); + TFI = TM.getFrameLowering(); TRI = TM.getRegisterInfo(); - TFI.getInitialFrameState(InitialFrameState); + TFI->getInitialFrameState(InitialFrameState); } diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 1990bc7b929c..17d022a339e6 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -42,6 +42,7 @@ char TargetData::ID = 0; //===----------------------------------------------------------------------===// StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { + assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; NumElements = ST->getNumElements(); @@ -313,63 +314,21 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, namespace { -class StructLayoutMap : public AbstractTypeUser { +class StructLayoutMap { typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; LayoutInfoTy LayoutInfo; - void RemoveEntry(LayoutInfoTy::iterator I, bool WasAbstract) { - I->second->~StructLayout(); - free(I->second); - if (WasAbstract) - I->first->removeAbstractTypeUser(this); - LayoutInfo.erase(I); - } - - - /// refineAbstractType - The callback method invoked when an abstract type is - /// resolved to another type. An object must override this method to update - /// its internal state to reference NewType instead of OldType. - /// - virtual void refineAbstractType(const DerivedType *OldTy, - const Type *) { - LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(OldTy)); - assert(I != LayoutInfo.end() && "Using type but not in map?"); - RemoveEntry(I, true); - } - - /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware - /// of is when a type makes the transition from being abstract (where it has - /// clients on its AbstractTypeUsers list) to concrete (where it does not). - /// This method notifies ATU's when this occurs for a type. - /// - virtual void typeBecameConcrete(const DerivedType *AbsTy) { - LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(AbsTy)); - assert(I != LayoutInfo.end() && "Using type but not in map?"); - RemoveEntry(I, true); - } - public: virtual ~StructLayoutMap() { // Remove any layouts. - for (LayoutInfoTy::iterator - I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) { - const Type *Key = I->first; + for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end(); + I != E; ++I) { StructLayout *Value = I->second; - - if (Key->isAbstract()) - Key->removeAbstractTypeUser(this); - Value->~StructLayout(); free(Value); } } - void InvalidateEntry(const StructType *Ty) { - LayoutInfoTy::iterator I = LayoutInfo.find(Ty); - if (I == LayoutInfo.end()) return; - RemoveEntry(I, Ty->isAbstract()); - } - StructLayout *&operator[](const StructType *STy) { return LayoutInfo[STy]; } @@ -404,22 +363,9 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { new (L) StructLayout(Ty, *this); - if (Ty->isAbstract()) - Ty->addAbstractTypeUser(STM); - return L; } -/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout -/// objects. If a TargetData object is alive when types are being refined and -/// removed, this method must be called whenever a StructType is removed to -/// avoid a dangling pointer in this cache. -void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { - if (!LayoutMap) return; // No cache. - - static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty); -} - std::string TargetData::getStringRepresentation() const { std::string Result; raw_string_ostream OS(Result); @@ -570,7 +516,7 @@ unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { /// getIntPtrType - Return an unsigned integer type that is the same size or /// greater to the host pointer size. -const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { +IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { return IntegerType::get(C, getPointerSizeInBits()); } diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index d4b76972e49a..d52ecb32cf75 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -12,44 +12,39 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" #include <cctype> using namespace llvm; //===----------------------------------------------------------------------===// -// TargetOperandInfo +// TargetInstrInfo //===----------------------------------------------------------------------===// -/// getRegClass - Get the register class for the operand, handling resolution -/// of "symbolic" pointer register classes etc. If this is not a register -/// operand, this returns null. -const TargetRegisterClass * -TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const { - if (isLookupPtrRegClass()) +TargetInstrInfo::~TargetInstrInfo() { +} + +const TargetRegisterClass* +TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + if (OpNum >= MCID.getNumOperands()) + return 0; + + short RegClass = MCID.OpInfo[OpNum].RegClass; + if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) return TRI->getPointerRegClass(RegClass); + // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) return 0; + // Otherwise just look it up normally. return TRI->getRegClass(RegClass); } -//===----------------------------------------------------------------------===// -// TargetInstrInfo -//===----------------------------------------------------------------------===// - -TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, - unsigned numOpcodes) - : Descriptors(Desc), NumOpcodes(numOpcodes) { -} - -TargetInstrInfo::~TargetInstrInfo() { -} - unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const { @@ -135,13 +130,13 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 3343384791fb..703431b3806e 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -35,38 +35,39 @@ using namespace llvm; // Generic Code //===----------------------------------------------------------------------===// -TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) { - TextSection = 0; - DataSection = 0; - BSSSection = 0; - ReadOnlySection = 0; - StaticCtorSection = 0; - StaticDtorSection = 0; - LSDASection = 0; - - CommDirectiveSupportsAlignment = true; - DwarfAbbrevSection = 0; - DwarfInfoSection = 0; - DwarfLineSection = 0; - DwarfFrameSection = 0; - DwarfPubNamesSection = 0; - DwarfPubTypesSection = 0; - DwarfDebugInlineSection = 0; - DwarfStrSection = 0; - DwarfLocSection = 0; - DwarfARangesSection = 0; - DwarfRangesSection = 0; - DwarfMacroInfoSection = 0; - - IsFunctionEHFrameSymbolPrivate = true; - SupportsWeakOmittedEHFrame = true; +TargetLoweringObjectFile::TargetLoweringObjectFile() : + Ctx(0), + TextSection(0), + DataSection(0), + BSSSection(0), + ReadOnlySection(0), + StaticCtorSection(0), + StaticDtorSection(0), + LSDASection(0), + CompactUnwindSection(0), + DwarfAbbrevSection(0), + DwarfInfoSection(0), + DwarfLineSection(0), + DwarfFrameSection(0), + DwarfPubNamesSection(0), + DwarfPubTypesSection(0), + DwarfDebugInlineSection(0), + DwarfStrSection(0), + DwarfLocSection(0), + DwarfARangesSection(0), + DwarfRangesSection(0), + DwarfMacroInfoSection(0), + TLSExtraDataSection(0), + CommDirectiveSupportsAlignment(true), + SupportsWeakOmittedEHFrame(true), + IsFunctionEHFrameSymbolPrivate(true) { } TargetLoweringObjectFile::~TargetLoweringObjectFile() { } static bool isSuitableForBSS(const GlobalVariable *GV) { - Constant *C = GV->getInitializer(); + const Constant *C = GV->getInitializer(); // Must have zero initializer. if (!C->isNullValue()) @@ -168,7 +169,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getBSS(); } - Constant *C = GVar->getInitializer(); + const Constant *C = GVar->getInitializer(); // If the global is marked constant, we can put it into a mergable section, // a mergable string section, or general .data if it contains relocations. diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 863b8114dc30..74a1f4e8da56 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -43,7 +43,7 @@ namespace llvm { Reloc::Model RelocationModel; CodeModel::Model CMModel; bool GuaranteedTailCallOpt; - unsigned StackAlignment; + unsigned StackAlignmentOverride; bool RealignStack; bool DisableJumpTables; bool StrongPHIElim; @@ -183,7 +183,7 @@ EnableGuaranteedTailCallOpt("tailcallopt", static cl::opt<unsigned, true> OverrideStackAlignment("stack-alignment", cl::desc("Override default stack alignment"), - cl::location(StackAlignment), + cl::location(StackAlignmentOverride), cl::init(0)); static cl::opt<bool, true> EnableRealignStack("realign-stack", @@ -216,8 +216,9 @@ FunctionSections("ffunction-sections", // TargetMachine Class // -TargetMachine::TargetMachine(const Target &T) - : TheTarget(T), AsmInfo(0), +TargetMachine::TargetMachine(const Target &T, + StringRef TT, StringRef CPU, StringRef FS) + : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0), MCRelaxAll(false), MCNoExecStack(false), MCSaveTempLabels(false), diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 1c3f2dda33c7..90a8f8d8fdcc 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -20,21 +20,11 @@ using namespace llvm; -TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, +TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, - const char *const *subregindexnames, - int CFSO, int CFDO, - const unsigned* subregs, const unsigned subregsize, - const unsigned* aliases, const unsigned aliasessize) - : SubregHash(subregs), SubregHashSize(subregsize), - AliasesHash(aliases), AliasesHashSize(aliasessize), - Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR), + const char *const *subregindexnames) + : InfoDesc(ID), SubRegIndexNames(subregindexnames), RegClassBegin(RCB), RegClassEnd(RCE) { - assert(isPhysicalRegister(NumRegs) && - "Target has too many physical registers!"); - - CallFrameSetupOpcode = CFSO; - CallFrameDestroyOpcode = CFDO; } TargetRegisterInfo::~TargetRegisterInfo() {} @@ -83,14 +73,14 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - R.set(*I); + ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF); + for (unsigned i = 0; i != Order.size(); ++i) + R.set(Order[i]); } BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC) const { - BitVector Allocatable(NumRegs); + BitVector Allocatable(getNumRegs()); if (RC) { getAllocatableSetForRC(MF, RC, Allocatable); } else { diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtargetInfo.cpp index edb76f971533..59ffdea00ea6 100644 --- a/lib/Target/TargetSubtarget.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- TargetSubtarget.cpp - General Target Information -------------------==// +//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==// // // The LLVM Compiler Infrastructure // @@ -11,18 +11,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; //--------------------------------------------------------------------------- -// TargetSubtarget Class +// TargetSubtargetInfo Class // -TargetSubtarget::TargetSubtarget() {} +TargetSubtargetInfo::TargetSubtargetInfo() {} -TargetSubtarget::~TargetSubtarget() {} +TargetSubtargetInfo::~TargetSubtargetInfo() {} -bool TargetSubtarget::enablePostRAScheduler( +bool TargetSubtargetInfo::enablePostRAScheduler( CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index c352bfcd8cce..d45dd352fbc4 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -15,9 +15,11 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -25,17 +27,15 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { + MCSubtargetInfo &STI; MCAsmParser &Parser; - TargetMachine &TM; - -protected: - unsigned Is64Bit : 1; private: MCAsmParser &getParser() const { return Parser; } @@ -61,6 +61,11 @@ private: /// or %es:(%edi) in 32bit mode. bool isDstOp(X86Operand &Op); + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) != 0; + } + /// @name Auto-generated Matcher Functions /// { @@ -70,12 +75,11 @@ private: /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM) - : TargetAsmParser(T), Parser(parser), TM(TM) { + X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) + : TargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<X86Subtarget>())); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); @@ -84,23 +88,6 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); }; - -class X86_32ATTAsmParser : public X86ATTAsmParser { -public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) - : X86ATTAsmParser(T, Parser, TM) { - Is64Bit = false; - } -}; - -class X86_64ATTAsmParser : public X86ATTAsmParser { -public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) - : X86ATTAsmParser(T, Parser, TM) { - Is64Bit = true; - } -}; - } // end anonymous namespace /// @name Auto-generated Match Functions @@ -155,7 +142,7 @@ struct X86Operand : public MCParsedAsmOperand { /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - virtual void dump(raw_ostream &OS) const {} + virtual void print(raw_ostream &OS) const {} StringRef getToken() const { assert(Kind == Token && "Invalid access!"); @@ -365,7 +352,7 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { - unsigned basereg = Is64Bit ? X86::RSI : X86::ESI; + unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; return (Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && @@ -375,7 +362,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { } bool X86ATTAsmParser::isDstOp(X86Operand &Op) { - unsigned basereg = Is64Bit ? X86::RDI : X86::EDI; + unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; return Op.isMem() && Op.Mem.SegReg == X86::ES && isa<MCConstantExpr>(Op.Mem.Disp) && @@ -406,7 +393,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: This should be done using Requires<In32BitMode> and // Requires<In64BitMode> so "eiz" usage in 64-bit instructions // can be also checked. - if (RegNo == X86::RIZ && !Is64Bit) + if (RegNo == X86::RIZ && !is64BitMode()) return Error(Tok.getLoc(), "riz register in 64-bit mode only"); // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. @@ -710,23 +697,6 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } - // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq - if (PatchedName.startswith("vpclmul")) { - unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( - PatchedName.slice(7, PatchedName.size() - 2)) - .Case("lqlq", 0x00) // src1[63:0], src2[63:0] - .Case("hqlq", 0x01) // src1[127:64], src2[63:0] - .Case("lqhq", 0x10) // src1[63:0], src2[127:64] - .Case("hqhq", 0x11) // src1[127:64], src2[127:64] - .Default(~0U); - if (CLMULQuadWordSelect != ~0U) { - ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, - getParser().getContext()); - assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); - PatchedName = "vpclmulqdq"; - } - } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) @@ -843,7 +813,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" if (Name.startswith("movs") && Operands.size() == 3 && (Name == "movsb" || Name == "movsw" || Name == "movsl" || - (Is64Bit && Name == "movsq"))) { + (is64BitMode() && Name == "movsq"))) { X86Operand &Op = *(X86Operand*)Operands.begin()[1]; X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; if (isSrcOp(Op) && isDstOp(Op2)) { @@ -856,7 +826,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" if (Name.startswith("lods") && Operands.size() == 3 && (Name == "lods" || Name == "lodsb" || Name == "lodsw" || - Name == "lodsl" || (Is64Bit && Name == "lodsq"))) { + Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); if (isSrcOp(*Op1) && Op2->isReg()) { @@ -886,7 +856,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" if (Name.startswith("stos") && Operands.size() == 3 && (Name == "stos" || Name == "stosb" || Name == "stosw" || - Name == "stosl" || (Is64Bit && Name == "stosq"))) { + Name == "stosl" || (is64BitMode() && Name == "stosq"))) { X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); if (isDstOp(*Op2) && Op1->isReg()) { @@ -1161,8 +1131,8 @@ extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); + RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); + RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b5fa94f12bc7..b112f9ff69bb 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -1,18 +1,15 @@ set(LLVM_TARGET_DEFINITIONS X86.td) -tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(X86GenRegisterNames.inc -gen-register-enums) -tablegen(X86GenRegisterInfo.inc -gen-register-desc) +tablegen(X86GenRegisterInfo.inc -gen-register-info) tablegen(X86GenDisassemblerTables.inc -gen-disassembler) -tablegen(X86GenInstrNames.inc -gen-instr-enums) -tablegen(X86GenInstrInfo.inc -gen-instr-desc) +tablegen(X86GenInstrInfo.inc -gen-instr-info) tablegen(X86GenAsmWriter.inc -gen-asm-writer) tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) tablegen(X86GenDAGISel.inc -gen-dag-isel) tablegen(X86GenFastISel.inc -gen-fast-isel) tablegen(X86GenCallingConv.inc -gen-callingconv) -tablegen(X86GenSubtarget.inc -gen-subtarget) +tablegen(X86GenSubtargetInfo.inc -gen-subtarget) tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) set(sources @@ -30,7 +27,6 @@ set(sources X86InstrInfo.cpp X86JITInfo.cpp X86MachObjectWriter.cpp - X86MCAsmInfo.cpp X86MCCodeEmitter.cpp X86MCInstLower.cpp X86RegisterInfo.cpp @@ -60,5 +56,6 @@ add_llvm_target(X86CodeGen ${sources}) add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) add_subdirectory(TargetInfo) add_subdirectory(Utils) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index d8a105e7e9d2..4a0d2ec727a9 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -26,7 +26,8 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" -#include "X86GenRegisterNames.inc" +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" #include "X86GenEDInfo.inc" using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 68247d2f1a5b..c37d8797b39c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,30 +15,23 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" -#include "X86Subtarget.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" #include <map> using namespace llvm; // Include the auto-generated portion of the assembly writer. #define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR -#include "X86GenRegisterNames.inc" #include "X86GenAsmWriter.inc" -#undef PRINT_ALIAS_INSTR -#undef GET_INSTRUCTION_NAME -X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) +X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures( - &TM.getSubtarget<X86Subtarget>())); } void X86ATTInstPrinter::printRegName(raw_ostream &OS, diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 5f939b61da21..5426e5cf38d9 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -19,19 +19,15 @@ namespace llvm { class MCOperand; -class X86Subtarget; -class TargetMachine; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI); + X86ATTInstPrinter(const MCAsmInfo &MAI); virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; - // Methods used to print the alias of an instruction. - unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; // Autogenerated by tblgen, returns true if we successfully printed an // alias. bool printAliasInstr(const MCInst *MI, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index c642acc3b9a2..4e28dfe7fa81 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86InstComments.h" -#include "X86GenInstrNames.inc" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" #include "../Utils/X86ShuffleDecode.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 5f581bab3906..506e26cbf7cd 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,13 +15,12 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" -#include "X86Subtarget.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" #include <cctype> using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index c8030c3ecdac..e84a1940017d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -20,11 +20,10 @@ namespace llvm { class MCOperand; -class TargetMachine; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + X86IntelInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..ca88f8ffd08c --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMX86Desc + X86MCTargetDesc.cpp + X86MCAsmInfo.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/X86/MCTargetDesc/Makefile b/lib/Target/X86/MCTargetDesc/Makefile new file mode 100644 index 000000000000..b19774ee379e --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 2e1ec6317601..27031005bd09 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "X86MCAsmInfo.h" -#include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -45,14 +44,17 @@ static const char *const x86_asm_table[] = { "{flags}", "", "{dirflag}", "", "{fpsr}", "", + "{fpcr}", "", "{cc}", "cc", 0,0}; -X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { +X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { + bool is64Bit = T.getArch() == Triple::x86_64; + if (is64Bit) + PointerSize = 8; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; - - bool is64Bit = Triple.getArch() == Triple::x86_64; TextAlignFillValue = 0x90; @@ -74,22 +76,14 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { ExceptionsType = ExceptionHandling::DwarfCFI; } -const MCExpr * -X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const { - MCContext &Context = Streamer.getContext(); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); - const MCExpr *Four = MCConstantExpr::Create(4, Context); - return MCBinaryExpr::CreateAdd(Res, Four, Context); -} - X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) : X86MCAsmInfoDarwin(Triple) { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { + if (T.getArch() == Triple::x86_64) + PointerSize = 8; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; @@ -114,6 +108,17 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { Data64bitsDirective = 0; } +const MCExpr * +X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::Create(4, Context); + return MCBinaryExpr::CreateAdd(Res, Four, Context); +} + const MCSection *X86ELFMCAsmInfo:: getNonexecutableStackSection(MCContext &Ctx) const { return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index 2cd4c8eb30ec..2cd4c8eb30ec 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp new file mode 100644 index 000000000000..b77f37b03f19 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -0,0 +1,185 @@ +//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetDesc.h" +#include "X86MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Host.h" + +#define GET_REGINFO_MC_DESC +#include "X86GenRegisterInfo.inc" + +#define GET_INSTRINFO_MC_DESC +#include "X86GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "X86GenSubtargetInfo.inc" + +using namespace llvm; + + +std::string X86_MC::ParseX86Triple(StringRef TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::x86_64) + return "+64bit-mode"; + return "-64bit-mode"; +} + +/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the +/// specified arguments. If we can't run cpuid on the host, return true. +bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + #if defined(__GNUC__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #endif +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + #if defined(__GNUC__) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + __asm { + mov eax,value + cpuid + mov esi,rEAX + mov dword ptr [esi],eax + mov esi,rEBX + mov dword ptr [esi],ebx + mov esi,rECX + mov dword ptr [esi],ecx + mov esi,rEDX + mov dword ptr [esi],edx + } + return false; + #endif +#endif + return true; +} + +void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, + unsigned &Model) { + Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (Family == 6 || Family == 0xf) { + if (Family == 0xf) + // Examine extended family ID if family ID is F. + Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + std::string ArchFS = X86_MC::ParseX86Triple(TT); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } + + std::string CPUName = CPU; + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } + + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeX86MCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target, + X86_MC::createX86MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target, + X86_MC::createX86MCSubtargetInfo); +} + +static MCInstrInfo *createX86MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitX86MCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeX86MCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo); +} + +static MCRegisterInfo *createX86MCRegisterInfo() { + MCRegisterInfo *X = new MCRegisterInfo(); + InitX86MCRegisterInfo(X); + return X; +} + +extern "C" void LLVMInitializeX86MCRegInfo() { + TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo); +} + + +static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { + if (TheTriple.getArch() == Triple::x86_64) + return new X86_64MCAsmInfoDarwin(TheTriple); + else + return new X86MCAsmInfoDarwin(TheTriple); + } + + if (TheTriple.isOSWindows()) + return new X86MCAsmInfoCOFF(TheTriple); + + return new X86ELFMCAsmInfo(TheTriple); +} + +extern "C" void LLVMInitializeX86MCAsmInfo() { + // Register the target asm info. + RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo); + RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo); +} diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h new file mode 100644 index 000000000000..89ea22b31be2 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -0,0 +1,60 @@ +//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides X86 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef X86MCTARGETDESC_H +#define X86MCTARGETDESC_H + +#include <string> + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheX86_32Target, TheX86_64Target; + +namespace X86_MC { + std::string ParseX86Triple(StringRef TT); + + /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in + /// the specified arguments. If we can't run cpuid on the host, return true. + bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX); + + void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); + + /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance. + /// This is exposed so Asm parser, etc. do not need to go through + /// TargetRegistry. + MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS); +} + +} // End llvm namespace + + +// Defines symbolic names for X86 registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "X86GenRegisterInfo.inc" + +// Defines symbolic names for the X86 instructions. +// +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "X86GenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 12fb090d4dce..949661eb99e9 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -12,14 +12,13 @@ LIBRARYNAME = LLVMX86CodeGen TARGET = X86 # Make sure that tblgen is run, first thing. -BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ - X86GenRegisterInfo.inc X86GenInstrNames.inc \ - X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \ +BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \ + X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ - X86GenCallingConv.inc X86GenSubtarget.inc \ + X86GenCallingConv.inc X86GenSubtargetInfo.inc \ X86GenEDInfo.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils +DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 0ca436690040..ec52dfb3e7d1 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" @@ -22,10 +23,12 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; +class MachineCodeEmitter; class MCCodeEmitter; class MCContext; +class MCInstrInfo; class MCObjectWriter; -class MachineCodeEmitter; +class MCSubtargetInfo; class Target; class TargetAsmBackend; class X86TargetMachine; @@ -57,10 +60,9 @@ FunctionPass *createSSEDomainFixPass(); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, - MCContext &Ctx); -MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, - MCContext &Ctx); +MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &); TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); @@ -84,17 +86,6 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); -extern Target TheX86_32Target, TheX86_64Target; - } // End llvm namespace -// Defines symbolic names for X86 registers. This defines a mapping from -// register name to register number. -// -#include "X86GenRegisterNames.inc" - -// Defines symbolic names for the X86 instructions. -// -#include "X86GenInstrNames.inc" - #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7bb96766cceb..4ccb43fe18cc 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -17,6 +17,13 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// +// X86 Subtarget state. +// + +def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", + "64-bit mode (x86_64)">; + +//===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 4d7d96dcb36b..9b556a55efd9 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -194,6 +194,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) { // PUSH case X86::PUSHi8: return X86::PUSHi32; + case X86::PUSHi16: return X86::PUSHi32; + case X86::PUSH64i8: return X86::PUSH64i32; + case X86::PUSH64i16: return X86::PUSH64i32; } } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index c2d53c4dd26c..99b4479a9fc9 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -709,13 +709,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, //===----------------------------------------------------------------------===// static MCInstPrinter *createX86MCInstPrinter(const Target &T, - TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(TM, MAI); + return new X86ATTInstPrinter(MAI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(TM, MAI); + return new X86IntelInstPrinter(MAI); return 0; } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 56351756e8dd..77b99056ae00 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -44,11 +44,11 @@ def RetCC_X86Common : CallingConv<[ // can only be used by ABI non-compliant code. This vector type is only // supported while using the AVX target feature. CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>, + CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>, // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>, + CCIfType<[x86mmx], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -91,10 +91,7 @@ def RetCC_X86_64_C : CallingConv<[ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, - // MMX vector types are always returned in XMM0 except for v1i64 which is - // returned in RAX. This disagrees with ABI documentation but is bug - // compatible with gcc. - CCIfType<[v1i64], CCAssignToReg<[RAX]>>, + // MMX vector types are always returned in XMM0. CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; @@ -102,11 +99,7 @@ def RetCC_X86_64_C : CallingConv<[ // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, - - // And FP in XMM0 only. - CCIfType<[f32], CCAssignToReg<[XMM0]>>, - CCIfType<[f64], CCAssignToReg<[XMM0]>>, + CCIfType<[x86mmx], CCBitConvertToType<i64>>, // Otherwise, everything is the same as 'normal' X86-64 C CC. CCDelegateTo<RetCC_X86_64_C> @@ -150,17 +143,11 @@ def CC_X86_64_C : CallingConv<[ // The 'nest' parameter, if any, is passed in R10. CCIfNest<CCAssignToReg<[R10]>>, - // The first 6 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCBitConvertToType<i64>>>, - // The first 6 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, - // The first 8 MMX (except for v1i64) vector arguments are passed in XMM - // registers on Darwin. + // The first 8 MMX vector arguments are passed in XMM registers on Darwin. CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasXMMInt()", @@ -189,10 +176,7 @@ def CC_X86_64_C : CallingConv<[ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned. CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - CCAssignToStack<32, 32>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> + CCAssignToStack<32, 32>> ]>; // Calling convention used on Win64 @@ -210,7 +194,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, + CCIfType<[x86mmx], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], @@ -236,10 +220,7 @@ def CC_X86_Win64_C : CallingConv<[ // Long doubles get stack slots whose size and alignment depends on the // subtarget. - CCIfType<[f80], CCAssignToStack<0, 0>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> + CCIfType<[f80], CCAssignToStack<0, 0>> ]>; def CC_X86_64_GHC : CallingConv<[ @@ -273,8 +254,8 @@ def CC_X86_32_Common : CallingConv<[ CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>, - // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx - // registers if the call is not a vararg call. + // The first 3 __m64 vector arguments are passed in mmx registers if the + // call is not a vararg call. CCIfNotVarArg<CCIfType<[x86mmx], CCAssignToReg<[MM0, MM1, MM2]>>>, @@ -306,7 +287,7 @@ def CC_X86_32_Common : CallingConv<[ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. - CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>; + CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ // Promote i8/i16 arguments to i32. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 421e221d205c..4b11db7c0331 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -68,7 +68,7 @@ namespace { return "X86 Machine Code Emitter"; } - void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc); + void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -132,7 +132,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - const TargetInstrDesc &Desc = I->getDesc(); + const MCInstrDesc &Desc = I->getDesc(); emitInstruction(*I, &Desc); // MOVPC32r is basically a call plus a pop instruction. if (Desc.getOpcode() == X86::MOVPC32r) @@ -150,7 +150,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { /// size, and 3) use of X86-64 extended registers. static unsigned determineREX(const MachineInstr &MI) { unsigned REX = 0; - const TargetInstrDesc &Desc = MI.getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); // Pseudo instructions do not need REX prefix byte. if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) @@ -161,7 +161,7 @@ static unsigned determineREX(const MachineInstr &MI) { unsigned NumOps = Desc.getNumOperands(); if (NumOps) { bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; @@ -598,7 +598,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, template<class CodeEmitter> void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, - const TargetInstrDesc *Desc) { + const MCInstrDesc *Desc) { DEBUG(dbgs() << MI); // If this is a pseudo instruction, lower it. @@ -708,9 +708,9 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) ++CurOp; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f1b9972530c6..21e163a30054 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -15,6 +15,7 @@ #include "X86.h" #include "X86InstrBuilder.h" +#include "X86ISelLowering.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -1392,7 +1393,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { assert(DI->getAddress() && "Null address should be checked earlier!"); if (!X86SelectAddress(DI->getAddress(), AM)) return false; - const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). @@ -1493,7 +1494,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { return false; // Fast-isel doesn't know about callee-pop yet. - if (Subtarget->IsCalleePop(isVarArg, CC)) + if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, + GuaranteedTailCallOpt)) return false; // Check whether the function can return without sret-demotion. @@ -1628,7 +1630,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { unsigned NumBytes = CCInfo.getNextStackOffset(); // Issue CALLSEQ_START - unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) .addImm(NumBytes); @@ -1801,7 +1803,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { MIB.addReg(RegArgs[i]); // Issue CALLSEQ_END - unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); unsigned NumBytesCallee = 0; if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) NumBytesCallee = 4; @@ -1846,16 +1848,19 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // stack, but where we prefer to use the value in xmm registers, copy it // out as F80 and use a truncate to move it from fp stack reg to xmm reg. if ((RVLocs[i].getLocReg() == X86::ST0 || - RVLocs[i].getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { - CopyVT = MVT::f80; - CopyReg = createResultReg(X86::RFP80RegisterClass); + RVLocs[i].getLocReg() == X86::ST1)) { + if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { + CopyVT = MVT::f80; + CopyReg = createResultReg(X86::RFP80RegisterClass); + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL), + CopyReg); + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CopyReg).addReg(RVLocs[i].getLocReg()); + UsedRegs.push_back(RVLocs[i].getLocReg()); } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - CopyReg).addReg(RVLocs[i].getLocReg()); - UsedRegs.push_back(RVLocs[i].getLocReg()); - if (CopyVT != RVLocs[i].getValVT()) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 325d0611817d..6eed6abd43e2 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -126,10 +127,45 @@ namespace { void bundleCFG(MachineFunction &MF); MachineBasicBlock *MBB; // Current basic block + + // The hardware keeps track of how many FP registers are live, so we have + // to model that exactly. Usually, each live register corresponds to an + // FP<n> register, but when dealing with calls, returns, and inline + // assembly, it is sometimes neccesary to have live scratch registers. unsigned Stack[8]; // FP<n> Registers in each stack slot... - unsigned RegMap[8]; // Track which stack slot contains each register unsigned StackTop; // The current top of the FP stack. + enum { + NumFPRegs = 16 // Including scratch pseudo-registers. + }; + + // For each live FP<n> register, point to its Stack[] entry. + // The first entries correspond to FP0-FP6, the rest are scratch registers + // used when we need slightly different live registers than what the + // register allocator thinks. + unsigned RegMap[NumFPRegs]; + + // Pending fixed registers - Inline assembly needs FP registers to appear + // in fixed stack slot positions. This is handled by copying FP registers + // to ST registers before the instruction, and copying back after the + // instruction. + // + // This is modeled with pending ST registers. NumPendingSTs is the number + // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP + // register that holds the ST value. The ST registers are not moved into + // place until immediately before the instruction that needs them. + // + // It can happen that we need an ST register to be live when no FP register + // holds the value: + // + // %ST0 = COPY %FP4<kill> + // + // When that happens, we allocate a scratch FP register to hold the ST + // value. That means every register in PendingST must be live. + + unsigned NumPendingSTs; + unsigned char PendingST[8]; + // Set up our stack model to match the incoming registers to MBB. void setupBlockStack(); @@ -142,13 +178,15 @@ namespace { dbgs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } + for (unsigned i = 0; i != NumPendingSTs; ++i) + dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]); dbgs() << "\n"; } /// getSlot - Return the stack slot number a particular register number is /// in. unsigned getSlot(unsigned RegNo) const { - assert(RegNo < 8 && "Regno out of range!"); + assert(RegNo < NumFPRegs && "Regno out of range!"); return RegMap[RegNo]; } @@ -160,12 +198,17 @@ namespace { /// getScratchReg - Return an FP register that is not currently in use. unsigned getScratchReg() { - for (int i = 7; i >= 0; --i) + for (int i = NumFPRegs - 1; i >= 8; --i) if (!isLive(i)) return i; llvm_unreachable("Ran out of scratch FP registers"); } + /// isScratchReg - Returns trus if RegNo is a scratch FP register. + bool isScratchReg(unsigned RegNo) { + return RegNo > 8 && RegNo < NumFPRegs; + } + /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { if (STi >= StackTop) @@ -181,7 +224,7 @@ namespace { // pushReg - Push the specified FP<n> register onto the stack. void pushReg(unsigned Reg) { - assert(Reg < 8 && "Register number out of range!"); + assert(Reg < NumFPRegs && "Register number out of range!"); if (StackTop >= 8) report_fatal_error("Stack overflow!"); Stack[StackTop] = Reg; @@ -236,7 +279,7 @@ namespace { /// Adjust the live registers to be the set in Mask. void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); - /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is + /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is /// st(0), FP reg FixStack[1] is st(1) etc. void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, MachineBasicBlock::iterator I); @@ -251,7 +294,14 @@ namespace { void handleCondMovFP(MachineBasicBlock::iterator &I); void handleSpecialFP(MachineBasicBlock::iterator &I); - bool translateCopy(MachineInstr*); + // Check if a COPY instruction is using FP registers. + bool isFPCopy(MachineInstr *MI) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + return X86::RFP80RegClass.contains(DstReg) || + X86::RFP80RegClass.contains(SrcReg); + } }; char FPS::ID = 0; } @@ -341,6 +391,7 @@ void FPS::bundleCFG(MachineFunction &MF) { bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; + NumPendingSTs = 0; setupBlockStack(); @@ -352,7 +403,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; - if (MI->isCopy() && translateCopy(MI)) + if (MI->isCopy() && isFPCopy(MI)) FPInstClass = X86II::SpecialFP; if (FPInstClass == X86II::NotFP) @@ -833,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { // Kill registers by popping. if (Kills && I != MBB->begin()) { MachineBasicBlock::iterator I2 = llvm::prior(I); - for (;;) { + while (StackTop) { unsigned KReg = getStackEntry(0); if (!(Kills & (1 << KReg))) break; @@ -881,7 +932,8 @@ void FPS::shuffleStackTop(const unsigned char *FixStack, continue; // (Reg st0) (OldReg st0) = (Reg OldReg st0) moveToTop(Reg, I); - moveToTop(OldReg, I); + if (FixCount > 0) + moveToTop(OldReg, I); } DEBUG(dumpStack()); } @@ -1239,141 +1291,307 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; switch (MI->getOpcode()) { default: llvm_unreachable("Unknown SpecialFP instruction!"); - case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! - case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! - case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! - assert(StackTop == 0 && "Stack should be empty after a call!"); - pushReg(getFPReg(MI->getOperand(0))); - break; - case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! - case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! - case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! - // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. - // The pattern we expect is: - // CALL - // FP1 = FpGET_ST0 - // FP4 = FpGET_ST1 - // - // At this point, we've pushed FP1 on the top of stack, so it should be - // present if it isn't dead. If it was dead, we already emitted a pop to - // remove it from the stack and StackTop = 0. - - // Push FP4 as top of stack next. - pushReg(getFPReg(MI->getOperand(0))); + case TargetOpcode::COPY: { + // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP. + const MachineOperand &MO1 = MI->getOperand(1); + const MachineOperand &MO0 = MI->getOperand(0); + unsigned DstST = MO0.getReg() - X86::ST0; + unsigned SrcST = MO1.getReg() - X86::ST0; + bool KillsSrc = MI->killsRegister(MO1.getReg()); + + // ST = COPY FP. Set up a pending ST register. + if (DstST < 8) { + unsigned SrcFP = getFPReg(MO1); + assert(isLive(SrcFP) && "Cannot copy dead register"); + assert(!MO0.isDead() && "Cannot copy to dead ST register"); + + // Unallocated STs are marked as the nonexistent FP255. + while (NumPendingSTs <= DstST) + PendingST[NumPendingSTs++] = NumFPRegs; + + // STi could still be live from a previous inline asm. + if (isScratchReg(PendingST[DstST])) { + DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST]) + << '\n'); + freeStackSlotBefore(MI, PendingST[DstST]); + } - // If StackTop was 0 before we pushed our operand, then ST(0) must have been - // dead. In this case, the ST(1) value is the only thing that is live, so - // it should be on the TOS (after the pop that was emitted) and is. Just - // continue in this case. - if (StackTop == 1) + // When the source is killed, allocate a scratch FP register. + if (KillsSrc) { + unsigned Slot = getSlot(SrcFP); + unsigned SR = getScratchReg(); + PendingST[DstST] = SR; + Stack[Slot] = SR; + RegMap[SR] = Slot; + } else + PendingST[DstST] = SrcFP; break; - - // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top - // elements so that our accounting is correct. - unsigned RegOnTop = getStackEntry(0); - unsigned RegNo = getStackEntry(1); - - // Swap the slots the regs are in. - std::swap(RegMap[RegNo], RegMap[RegOnTop]); - - // Swap stack slot contents. - if (RegMap[RegOnTop] >= StackTop) - report_fatal_error("Access past stack top!"); - std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); - break; - } - case X86::FpSET_ST0_32: - case X86::FpSET_ST0_64: - case X86::FpSET_ST0_80: { - // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm - // arguments that use an st constraint. We expect a sequence of - // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM - unsigned Op0 = getFPReg(MI->getOperand(0)); - - if (!MI->killsRegister(X86::FP0 + Op0)) { - // Duplicate Op0 into a temporary on the stack top. - duplicateToTop(Op0, getScratchReg(), I); - } else { - // Op0 is killed, so just swap it into position. - moveToTop(Op0, I); } - --StackTop; // "Forget" we have something on the top of stack! - break; - } - case X86::FpSET_ST1_32: - case X86::FpSET_ST1_64: - case X86::FpSET_ST1_80: { - // Set up st(1) for inline asm. We are assuming that st(0) has already been - // set up by FpSET_ST0, and our StackTop is off by one because of it. - unsigned Op0 = getFPReg(MI->getOperand(0)); - // Restore the actual StackTop from before Fp_SET_ST0. - // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we - // are not enforcing the constraint. - ++StackTop; - unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). - if (!MI->killsRegister(X86::FP0 + Op0)) { - duplicateToTop(Op0, getScratchReg(), I); - moveToTop(RegOnTop, I); - } else if (getSTReg(Op0) != X86::ST1) { - // We have the wrong value at st(1). Shuffle! Untested! - moveToTop(getStackEntry(1), I); - moveToTop(Op0, I); - moveToTop(RegOnTop, I); + + // FP = COPY ST. Extract fixed stack value. + // Any instruction defining ST registers must have assigned them to a + // scratch register. + if (SrcST < 8) { + unsigned DstFP = getFPReg(MO0); + assert(!isLive(DstFP) && "Cannot copy ST to live FP register"); + assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register"); + unsigned SrcFP = PendingST[SrcST]; + assert(isScratchReg(SrcFP) && "Expected ST in a scratch register"); + assert(isLive(SrcFP) && "Scratch holding ST is dead"); + + // DstFP steals the stack slot from SrcFP. + unsigned Slot = getSlot(SrcFP); + Stack[Slot] = DstFP; + RegMap[DstFP] = Slot; + + // Always treat the ST as killed. + PendingST[SrcST] = NumFPRegs; + while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) + --NumPendingSTs; + break; } - assert(StackTop >= 2 && "Too few live registers"); - StackTop -= 2; // "Forget" both st(0) and st(1). - break; - } - case X86::MOV_Fp3232: - case X86::MOV_Fp3264: - case X86::MOV_Fp6432: - case X86::MOV_Fp6464: - case X86::MOV_Fp3280: - case X86::MOV_Fp6480: - case X86::MOV_Fp8032: - case X86::MOV_Fp8064: - case X86::MOV_Fp8080: { - const MachineOperand &MO1 = MI->getOperand(1); - unsigned SrcReg = getFPReg(MO1); - const MachineOperand &MO0 = MI->getOperand(0); - unsigned DestReg = getFPReg(MO0); - if (MI->killsRegister(X86::FP0+SrcReg)) { + // FP <- FP copy. + unsigned DstFP = getFPReg(MO0); + unsigned SrcFP = getFPReg(MO1); + assert(isLive(SrcFP) && "Cannot copy dead register"); + if (KillsSrc) { // If the input operand is killed, we can just change the owner of the // incoming stack slot into the result. - unsigned Slot = getSlot(SrcReg); - assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); - Stack[Slot] = DestReg; - RegMap[DestReg] = Slot; - + unsigned Slot = getSlot(SrcFP); + Stack[Slot] = DstFP; + RegMap[DstFP] = Slot; } else { - // For FMOV we just duplicate the specified value to a new stack slot. + // For COPY we just duplicate the specified value to a new stack slot. // This could be made better, but would require substantial changes. - duplicateToTop(SrcReg, DestReg, I); + duplicateToTop(SrcFP, DstFP, I); } + break; + } + + case X86::FpPOP_RETVAL: { + // The FpPOP_RETVAL instruction is used after calls that return a value on + // the floating point stack. We cannot model this with ST defs since CALL + // instructions have fixed clobber lists. This instruction is interpreted + // to mean that there is one more live register on the stack than we + // thought. + // + // This means that StackTop does not match the hardware stack between a + // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions + // between CALL and FpPOP_RETVAL as long as they don't overflow the + // hardware stack. + unsigned DstFP = getFPReg(MI->getOperand(0)); + + // Move existing stack elements up to reflect reality. + assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL"); + if (StackTop) { + std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1); + for (unsigned i = 0; i != NumFPRegs; ++i) + ++RegMap[i]; } + ++StackTop; + + // DstFP is the new bottom of the stack. + Stack[0] = DstFP; + RegMap[DstFP] = 0; + + // DstFP will be killed by processBasicBlock if this was a dead def. break; + } + case TargetOpcode::INLINEASM: { // The inline asm MachineInstr currently only *uses* FP registers for the // 'f' constraint. These should be turned into the current ST(x) register - // in the machine instr. Also, any kills should be explicitly popped after - // the inline asm. - unsigned Kills = 0; + // in the machine instr. + // + // There are special rules for x87 inline assembly. The compiler must know + // exactly how many registers are popped and pushed implicitly by the asm. + // Otherwise it is not possible to restore the stack state after the inline + // asm. + // + // There are 3 kinds of input operands: + // + // 1. Popped inputs. These must appear at the stack top in ST0-STn. A + // popped input operand must be in a fixed stack slot, and it is either + // tied to an output operand, or in the clobber list. The MI has ST use + // and def operands for these inputs. + // + // 2. Fixed inputs. These inputs appear in fixed stack slots, but are + // preserved by the inline asm. The fixed stack slots must be STn-STm + // following the popped inputs. A fixed input operand cannot be tied to + // an output or appear in the clobber list. The MI has ST use operands + // and no defs for these inputs. + // + // 3. Preserved inputs. These inputs use the "f" constraint which is + // represented as an FP register. The inline asm won't change these + // stack slots. + // + // Outputs must be in ST registers, FP outputs are not allowed. Clobbered + // registers do not count as output operands. The inline asm changes the + // stack as if it popped all the popped inputs and then pushed all the + // output operands. + + // Scan the assembly for ST registers used, defined and clobbered. We can + // only tell clobbers from defs by looking at the asm descriptor. + unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0; + unsigned NumOps = 0; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands(); + i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) { + unsigned Flags = MI->getOperand(i).getImm(); + NumOps = InlineAsm::getNumOperandRegisters(Flags); + if (NumOps != 1) + continue; + const MachineOperand &MO = MI->getOperand(i + 1); + if (!MO.isReg()) + continue; + unsigned STReg = MO.getReg() - X86::ST0; + if (STReg >= 8) + continue; + + switch (InlineAsm::getKind(Flags)) { + case InlineAsm::Kind_RegUse: + STUses |= (1u << STReg); + break; + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: + STDefs |= (1u << STReg); + if (MO.isDead()) + STDeadDefs |= (1u << STReg); + break; + case InlineAsm::Kind_Clobber: + STClobbers |= (1u << STReg); + break; + default: + break; + } + } + + if (STUses && !isMask_32(STUses)) + MI->emitError("fixed input regs must be last on the x87 stack"); + unsigned NumSTUses = CountTrailingOnes_32(STUses); + + // Defs must be contiguous from the stack top. ST0-STn. + if (STDefs && !isMask_32(STDefs)) { + MI->emitError("output regs must be last on the x87 stack"); + STDefs = NextPowerOf2(STDefs) - 1; + } + unsigned NumSTDefs = CountTrailingOnes_32(STDefs); + + // So must the clobbered stack slots. ST0-STm, m >= n. + if (STClobbers && !isMask_32(STDefs | STClobbers)) + MI->emitError("clobbers must be last on the x87 stack"); + + // Popped inputs are the ones that are also clobbered or defined. + unsigned STPopped = STUses & (STDefs | STClobbers); + if (STPopped && !isMask_32(STPopped)) + MI->emitError("implicitly popped regs must be last on the x87 stack"); + unsigned NumSTPopped = CountTrailingOnes_32(STPopped); + + DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops " + << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n"); + + // Scan the instruction for FP uses corresponding to "f" constraints. + // Collect FP registers to kill afer the instruction. + // Always kill all the scratch regs. + unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; + unsigned FPUsed = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) continue; - assert(Op.isUse() && "Only handle inline asm uses right now"); - + if (!Op.isUse()) + MI->emitError("illegal \"f\" output constraint"); unsigned FPReg = getFPReg(Op); - Op.setReg(getSTReg(FPReg)); - + FPUsed |= 1U << FPReg; + // If we kill this operand, make sure to pop it from the stack after the // asm. We just remember it for now, and pop them all off at the end in // a batch. if (Op.isKill()) - Kills |= 1U << FPReg; + FPKills |= 1U << FPReg; + } + + // The popped inputs will be killed by the instruction, so duplicate them + // if the FP register needs to be live after the instruction, or if it is + // used in the instruction itself. We effectively treat the popped inputs + // as early clobbers. + for (unsigned i = 0; i < NumSTPopped; ++i) { + if ((FPKills & ~FPUsed) & (1u << PendingST[i])) + continue; + unsigned SR = getScratchReg(); + duplicateToTop(PendingST[i], SR, I); + DEBUG(dbgs() << "Duplicating ST" << i << " in FP" + << unsigned(PendingST[i]) << " to avoid clobbering it.\n"); + PendingST[i] = SR; + } + + // Make sure we have a unique live register for every fixed use. Some of + // them could be undef uses, and we need to emit LD_F0 instructions. + for (unsigned i = 0; i < NumSTUses; ++i) { + if (i < NumPendingSTs && PendingST[i] < NumFPRegs) { + // Check for shared assignments. + for (unsigned j = 0; j < i; ++j) { + if (PendingST[j] != PendingST[i]) + continue; + // STi and STj are inn the same register, create a copy. + unsigned SR = getScratchReg(); + duplicateToTop(PendingST[i], SR, I); + DEBUG(dbgs() << "Duplicating ST" << i << " in FP" + << unsigned(PendingST[i]) + << " to avoid collision with ST" << j << '\n'); + PendingST[i] = SR; + } + continue; + } + unsigned SR = getScratchReg(); + DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n'); + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0)); + pushReg(SR); + PendingST[i] = SR; + if (NumPendingSTs == i) + ++NumPendingSTs; + } + assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned"); + + // Now we can rearrange the live registers to match what was requested. + shuffleStackTop(PendingST, NumPendingSTs, I); + DEBUG({dbgs() << "Before asm: "; dumpStack();}); + + // With the stack layout fixed, rewrite the FP registers. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) + continue; + unsigned FPReg = getFPReg(Op); + Op.setReg(getSTReg(FPReg)); + } + + // Simulate the inline asm popping its inputs and pushing its outputs. + StackTop -= NumSTPopped; + + // Hold the fixed output registers in scratch FP registers. They will be + // transferred to real FP registers by copies. + NumPendingSTs = 0; + for (unsigned i = 0; i < NumSTDefs; ++i) { + unsigned SR = getScratchReg(); + pushReg(SR); + FPKills &= ~(1u << SR); + } + for (unsigned i = 0; i < NumSTDefs; ++i) + PendingST[NumPendingSTs++] = getStackEntry(i); + DEBUG({dbgs() << "After asm: "; dumpStack();}); + + // If any of the ST defs were dead, pop them immediately. Our caller only + // handles dead FP defs. + MachineBasicBlock::iterator InsertPt = MI; + for (unsigned i = 0; STDefs & (1u << i); ++i) { + if (!(STDeadDefs & (1u << i))) + continue; + freeStackSlotAfter(InsertPt, PendingST[i]); + PendingST[i] = NumFPRegs; } + while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs) + --NumPendingSTs; // If this asm kills any FP registers (is the last use of them) we must // explicitly emit pop instructions for them. Do this now after the asm has @@ -1382,16 +1600,16 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // // Note: this might be a non-optimal pop sequence. We might be able to do // better by trying to pop in stack order or something. - MachineBasicBlock::iterator InsertPt = MI; - while (Kills) { - unsigned FPReg = CountTrailingZeros_32(Kills); - freeStackSlotAfter(InsertPt, FPReg); - Kills &= ~(1U << FPReg); + while (FPKills) { + unsigned FPReg = CountTrailingZeros_32(FPKills); + if (isLive(FPReg)) + freeStackSlotAfter(InsertPt, FPReg); + FPKills &= ~(1U << FPReg); } // Don't delete the inline asm! return; } - + case X86::RET: case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and @@ -1489,33 +1707,3 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } else --I; } - -// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. -bool FPS::translateCopy(MachineInstr *MI) { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - - if (DstReg == X86::ST0) { - MI->setDesc(TII->get(X86::FpSET_ST0_80)); - MI->RemoveOperand(0); - return true; - } - if (DstReg == X86::ST1) { - MI->setDesc(TII->get(X86::FpSET_ST1_80)); - MI->RemoveOperand(0); - return true; - } - if (SrcReg == X86::ST0) { - MI->setDesc(TII->get(X86::FpGET_ST0_80)); - return true; - } - if (SrcReg == X86::ST1) { - MI->setDesc(TII->get(X86::FpGET_ST1_80)); - return true; - } - if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { - MI->setDesc(TII->get(X86::MOV_Fp8080)); - return true; - } - return false; -} diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 2e95300160d8..ed45a9a4c1c0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====// +//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -160,8 +161,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) : (Is64Bit ? X86::POP64r : X86::POP32r); - BuildMI(MBB, MBBI, DL, TII.get(Opc)) + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); + if (isSub) + MI->setFlag(MachineInstr::FrameSetup); Offset -= ThisVal; continue; } @@ -171,6 +174,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(ThisVal); + if (isSub) + MI->setFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. Offset -= ThisVal; } @@ -409,7 +414,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) - .addImm(-TailCallReturnAddrDelta); + .addImm(-TailCallReturnAddrDelta) + .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } @@ -447,7 +453,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(FramePtr, RegState::Kill); + .addReg(FramePtr, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. @@ -474,7 +481,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Update EBP with the new base value... BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) - .addReg(StackPtr); + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. @@ -642,7 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { } void X86FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { + MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); @@ -919,7 +927,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } X86FI->setCalleeSavedFrameSize(CalleeFrameSize); @@ -1021,3 +1030,181 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, FrameIdx = 0; } } + +/// permuteEncode - Create the permutation encoding used with frameless +/// stacks. It is passed the number of registers to be saved and an array of the +/// registers saved. +static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) { + // The saved registers are numbered from 1 to 6. In order to encode the order + // in which they were saved, we re-number them according to their place in the + // register order. The re-numbering is relative to the last re-numbered + // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + bool Used[7] = { false, false, false, false, false, false, false }; + uint32_t RenumRegs[6]; + for (unsigned I = 0; I < SavedCount; ++I) { + uint32_t Renum = 0; + for (unsigned U = 1; U < 7; ++U) { + if (U == Registers[I]) + break; + if (!Used[U]) + ++Renum; + } + + Used[Registers[I]] = true; + RenumRegs[I] = Renum; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (SavedCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1] + + 3 * RenumRegs[2] + RenumRegs[3]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1] + + RenumRegs[2]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1]; + break; + case 1: + permutationEncoding |= RenumRegs[0]; + break; + } + + return permutationEncoding; +} + +uint32_t X86FrameLowering:: +getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, + int DataAlignmentFactor, bool IsEH) const { + uint32_t Encoding = 0; + int CFAOffset = 0; + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; + unsigned SavedRegIdx = 0; + int FramePointerReg = -1; + + for (ArrayRef<MCCFIInstruction>::const_iterator + I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { + const MCCFIInstruction &Inst = *I; + MCSymbol *Label = Inst.getLabel(); + + // Ignore invalid labels. + if (Label && !Label->isDefined()) continue; + + unsigned Operation = Inst.getOperation(); + if (Operation != MCCFIInstruction::Move && + Operation != MCCFIInstruction::RelMove) + // FIXME: We can't handle this frame just yet. + return 0; + + const MachineLocation &Dst = Inst.getDestination(); + const MachineLocation &Src = Inst.getSource(); + const bool IsRelative = (Operation == MCCFIInstruction::RelMove); + + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + if (Src.getReg() != MachineLocation::VirtualFP) { + // DW_CFA_def_cfa + assert(FramePointerReg == -1 &&"Defining more than one frame pointer?"); + if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP && + TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP) + // The frame pointer isn't EBP/RBP. Cannot make unwind information + // compact. + return 0; + FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH); + } // else DW_CFA_def_cfa_offset + + if (IsRelative) + CFAOffset += Src.getOffset(); + else + CFAOffset -= Src.getOffset(); + + continue; + } + + if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + // DW_CFA_def_cfa_register + assert(FramePointerReg == -1 && "Defining more than one frame pointer?"); + + if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP && + TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP) + // The frame pointer isn't EBP/RBP. Cannot make unwind information + // compact. + return 0; + + FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH); + if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg)) + return 0; + + SavedRegs[0] = 0; + SavedRegIdx = 0; + continue; + } + + unsigned Reg = Src.getReg(); + int Offset = Dst.getOffset(); + if (IsRelative) + Offset -= CFAOffset; + Offset /= DataAlignmentFactor; + + if (Offset < 0) { + // FIXME: Handle? + // DW_CFA_offset_extended_sf + return 0; + } else if (Reg < 64) { + // DW_CFA_offset + Reg + if (SavedRegIdx >= 6) return 0; + int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH); + if (CURegNum == -1) return 0; + SavedRegs[SavedRegIdx++] = CURegNum; + } else { + // FIXME: Handle? + // DW_CFA_offset_extended + return 0; + } + } + + // Bail if there are too many registers to encode. + if (SavedRegIdx > 6) return 0; + + // Check if the offset is too big. + CFAOffset /= 4; + if ((CFAOffset & 0xFF) != CFAOffset) + return 0; + Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding. + + if (FramePointerReg != -1) { + Encoding |= 0x01000000; // EBP/RBP Unwind Frame + for (unsigned I = 0; I != SavedRegIdx; ++I) { + unsigned Reg = SavedRegs[I]; + if (Reg == unsigned(FramePointerReg)) continue; + Encoding |= (Reg & 0x7) << (I * 3); // Register encoding + } + } else { + Encoding |= 0x02000000; // Frameless unwind with small stack + Encoding |= (SavedRegIdx & 0x7) << 10; + Encoding |= permuteEncode(SavedRegIdx, SavedRegs); + } + + return Encoding; +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index d71108cd0586..14c31ed47cf1 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -15,6 +15,7 @@ #define X86_FRAMELOWERING_H #include "X86Subtarget.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -58,6 +59,9 @@ public: void getInitialFrameState(std::vector<MachineMove> &Moves) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + + uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, + int DataAlignmentFactor, bool IsEH) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1fcc274e0f85..2b0f283bec75 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -23,6 +23,7 @@ #include "llvm/Intrinsics.h" #include "llvm/Support/CFG.h" #include "llvm/Type.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -191,6 +192,7 @@ namespace { SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); + bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); @@ -546,6 +548,34 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); } +static bool isDispSafeForFrameIndex(int64_t Val) { + // On 64-bit platforms, we can run into an issue where a frame index + // includes a displacement that, when added to the explicit displacement, + // will overflow the displacement field. Assuming that the frame index + // displacement fits into a 31-bit integer (which is only slightly more + // aggressive than the current fundamental assumption that it fits into + // a 32-bit integer), a 31-bit disp should always be safe. + return isInt<31>(Val); +} + +bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, + X86ISelAddressMode &AM) { + int64_t Val = AM.Disp + Offset; + CodeModel::Model M = TM.getCodeModel(); + if (Subtarget->is64Bit()) { + if (!X86::isOffsetSuitableForCodeModel(Val, M, + AM.hasSymbolicDisplacement())) + return true; + // In addition to the checks required for a register base, check that + // we do not try to use an unsafe Disp with a frame index. + if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && + !isDispSafeForFrameIndex(Val)) + return true; + } + AM.Disp = Val; + return false; + +} bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); @@ -595,18 +625,22 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // must allow RIP. !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { - int64_t Offset = AM.Disp + G->getOffset(); - if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; + X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); - AM.Disp = Offset; AM.SymbolFlags = G->getTargetFlags(); + if (FoldOffsetIntoAddress(G->getOffset(), AM)) { + AM = Backup; + return true; + } } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { - int64_t Offset = AM.Disp + CP->getOffset(); - if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; + X86ISelAddressMode Backup = AM; AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); - AM.Disp = Offset; AM.SymbolFlags = CP->getTargetFlags(); + if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { + AM = Backup; + return true; + } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { AM.ES = S->getSymbol(); AM.SymbolFlags = S->getTargetFlags(); @@ -688,7 +722,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { - bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); DEBUG({ dbgs() << "MatchAddress: "; @@ -698,8 +731,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (Depth > 5) return MatchAddressBase(N, AM); - CodeModel::Model M = TM.getCodeModel(); - // If this is already a %rip relative address, we can only merge immediates // into it. Instead of handling this in every case, we handle it here. // RIP relative addressing: %rip + 32-bit displacement! @@ -709,14 +740,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // consistency. if (!AM.ES && AM.JT != -1) return true; - if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { - int64_t Val = AM.Disp + Cst->getSExtValue(); - if (X86::isOffsetSuitableForCodeModel(Val, M, - AM.hasSymbolicDisplacement())) { - AM.Disp = Val; + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) + if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) return false; - } - } return true; } @@ -724,12 +750,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, default: break; case ISD::Constant: { uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, - AM.hasSymbolicDisplacement())) { - AM.Disp += Val; + if (!FoldOffsetIntoAddress(Val, AM)) return false; - } break; } @@ -745,8 +767,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::FrameIndex: - if (AM.BaseType == X86ISelAddressMode::RegBase - && AM.Base_Reg.getNode() == 0) { + if (AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base_Reg.getNode() == 0 && + (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); return false; @@ -775,16 +798,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); - uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(Disp, M, - AM.hasSymbolicDisplacement())) - AM.Disp = Disp; - else - AM.IndexReg = ShVal; - } else { - AM.IndexReg = ShVal; + uint64_t Disp = AddVal->getSExtValue() << Val; + if (!FoldOffsetIntoAddress(Disp, AM)) + return false; } + + AM.IndexReg = ShVal; return false; } break; @@ -818,13 +837,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Reg = MulVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); - uint64_t Disp = AM.Disp + AddVal->getSExtValue() * - CN->getZExtValue(); - if (!is64Bit || - X86::isOffsetSuitableForCodeModel(Disp, M, - AM.hasSymbolicDisplacement())) - AM.Disp = Disp; - else + uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); + if (FoldOffsetIntoAddress(Disp, AM)) Reg = N.getNode()->getOperand(0); } else { Reg = N.getNode()->getOperand(0); @@ -949,19 +963,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (CurDAG->isBaseWithConstantOffset(N)) { X86ISelAddressMode Backup = AM; ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); - uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && - // Address could not have picked a GV address for the displacement. - AM.GV == NULL && - // On x86-64, the resultant disp must fit in 32-bits. - (!is64Bit || - X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, - AM.hasSymbolicDisplacement()))) { - AM.Disp += Offset; + !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) return false; - } AM = Backup; } break; @@ -1351,7 +1357,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { bool isInc = false, isDec = false, isSub = false, isCN = false; ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN) { + if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { isCN = true; int64_t CNVal = CN->getSExtValue(); if (CNVal == 1) @@ -1371,6 +1377,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Val = Val.getOperand(1); } + DebugLoc dl = Node->getDebugLoc(); unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { default: return 0; @@ -1462,7 +1469,6 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { break; } - DebugLoc dl = Node->getDebugLoc(); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -1579,7 +1585,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { bool isCN = false; ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN) { + if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { isCN = true; Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); } @@ -1612,16 +1618,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Opc = AtomicOpcTbl[Op][I32]; break; case MVT::i64: + Opc = AtomicOpcTbl[Op][I64]; if (isCN) { if (immSext8(Val.getNode())) Opc = AtomicOpcTbl[Op][SextConstantI64]; else if (i64immSExt32(Val.getNode())) Opc = AtomicOpcTbl[Op][ConstantI64]; - } else - Opc = AtomicOpcTbl[Op][I64]; + } break; } + assert(Opc != 0 && "Invalid arith lock transform!"); + DebugLoc dl = Node->getDebugLoc(); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 294a6a74cc77..5096d9ae2edf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); + setLibcallName(RTLIB::SREM_I64, "_allrem"); + setLibcallName(RTLIB::UREM_I64, "_aullrem"); + setLibcallName(RTLIB::MUL_I64, "_allmul"); setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); } @@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } + // We don't support FMA. + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + // Long double always uses X87. if (!UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); @@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + + setOperationAction(ISD::FMA, MVT::f80, Expand); } // Always use a library call for pow. @@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); - setOperationAction(ISD::LOAD, MVT::v8i32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); @@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); - // Custom lower build_vector, vector_shuffle, scalar_to_vector, - // insert_vector_elt extract_subvector and extract_vector_elt for - // 256-bit types. - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements()) - || (MVT(VT).getSizeInBits() < 256)) - continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - } - // Custom-lower insert_subvector and extract_subvector based on - // the result type. + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-256-bit vectors - if (!isPowerOf2_32(MVT(VT).getVectorNumElements())) + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; + + // Extract subvector is special because the value type + // (result) is 128-bit but the source is 256-bit wide. + if (VT.is128BitVector()) + setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + + // Do not attempt to custom lower other non-256-bit vectors + if (!VT.is256BitVector()) continue; - if (MVT(VT).getSizeInBits() == 128) { - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - } - else if (MVT(VT).getSizeInBits() == 256) { - setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - } + setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - // Don't promote loads because we need them for VPERM vector index versions. + for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) { + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; - VT++) { - if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements()) - || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256)) + // Do not attempt to promote non-256-bit vectors + if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64); - //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64); + + setOperationAction(ISD::AND, SVT, Promote); + AddPromotedToType (ISD::AND, SVT, MVT::v4i64); + setOperationAction(ISD::OR, SVT, Promote); + AddPromotedToType (ISD::OR, SVT, MVT::v4i64); + setOperationAction(ISD::XOR, SVT, Promote); + AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); + setOperationAction(ISD::LOAD, SVT, Promote); + AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); + setOperationAction(ISD::SELECT, SVT, Promote); + AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); } } + // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion + // of this type with custom code. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) { + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom); + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -1511,20 +1517,15 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is a call to a function that returns an fp value on the floating // point stack, we must guarantee the the value is popped from the stack, so // a CopyFromReg is not good enough - the copy instruction may be eliminated - // if the return value is not used. We use the FpGET_ST0 instructions + // if the return value is not used. We use the FpPOP_RETVAL instruction // instead. if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; - bool isST0 = VA.getLocReg() == X86::ST0; - unsigned Opc = 0; - if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; - if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; - if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; SDValue Ops[] = { Chain, InFlag }; - Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue, - Ops, 2), 1); + Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, + MVT::Other, MVT::Glue, Ops, 2), 1); Val = Chain.getValue(0); // Round the f80 to the right size, which also moves it to the appropriate @@ -1898,7 +1899,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (Subtarget->IsCalleePop(isVarArg, CallConv)) { + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -2271,6 +2272,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); if (!GV->hasDLLImportLinkage()) { unsigned char OpFlags = 0; + bool ExtraLoad = false; + unsigned WrapperKind = ISD::DELETED_NODE; // On ELF targets, in both X86-64 and X86-32 mode, direct calls to // external symbols most go through the PLT in PIC mode. If the symbol @@ -2288,10 +2291,28 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; + } else if (Subtarget->isPICStyleRIPRel() && + isa<Function>(GV) && + cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) { + // If the function is marked as non-lazy, generate an indirect call + // which loads from the GOT directly. This avoids runtime overhead + // at the cost of eager binding (and one extra byte of encoding). + OpFlags = X86II::MO_GOTPCREL; + WrapperKind = X86ISD::WrapperRIP; + ExtraLoad = true; } Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), G->getOffset(), OpFlags); + + // Add a wrapper if needed. + if (WrapperKind != ISD::DELETED_NODE) + Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee); + // Add extra indirection if needed. + if (ExtraLoad) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(), + false, false, 0); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; @@ -2363,7 +2384,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (Subtarget->IsCalleePop(isVarArg, CallConv)) + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2485,6 +2506,10 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if (!FINode) return false; FI = FINode->getIndex(); + } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { + FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); + FI = FINode->getIndex(); + Bytes = Flags.getByValSize(); } else return false; @@ -2536,6 +2561,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; + // An stdcall caller is expected to clean up its arguments; the callee + // isn't going to do that. + if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + return false; + // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. if (isVarArg && !Outs.empty()) { @@ -2672,11 +2702,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } - // An stdcall caller is expected to clean up its arguments; the callee - // isn't going to do that. - if (!CCMatch && CallerCC==CallingConv::X86_StdCall) - return false; - return true; } @@ -2856,6 +2881,29 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, return false; } +/// isCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86::isCalleePop(CallingConv::ID CallingConv, + bool is64Bit, bool IsVarArg, bool TailCallOpt) { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !is64Bit; + case CallingConv::X86_FastCall: + return !is64Bit; + case CallingConv::X86_ThisCall: + return !is64Bit; + case CallingConv::Fast: + return TailCallOpt; + case CallingConv::GHC: + return TailCallOpt; + } +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -3790,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// +/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to +/// their original type, ensuring they get CSE'd. static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); + assert((VT.is128BitVector() || VT.is256BitVector()) + && "Expected a 128-bit or 256-bit vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + SDValue Vec; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + if (VT.is256BitVector()) { + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } - /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { @@ -4417,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return ConcatVectors(Lower, Upper, DAG); } - // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. - // All one's are handled with pcmpeqd. In AVX, zero's are handled with - // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd - // is present, so AllOnes is ignored. + // All zero's: + // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) + // All one's: + // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) if (ISD::isBuildVectorAllZeros(Op.getNode()) || - (Op.getValueType().getSizeInBits() != 256 && - ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to <4 x i32> (SSE) to + ISD::isBuildVectorAllOnes(Op.getNode())) { + // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32) + if (Op.getValueType() == MVT::v4i32 || + Op.getValueType() == MVT::v8i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -8874,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Lower SHL with variable shift amount. - // Cannot lower SHL without SSE4.1 or later. - if (!Subtarget->hasSSE41()) return SDValue(); + // Cannot lower SHL without SSE2 or later. + if (!Subtarget->hasSSE2()) return SDValue(); if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, @@ -9022,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + SDNode* Node = Op.getNode(); + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + + if (Subtarget->hasSSE2() && VT.isVector()) { + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); + + unsigned SHLIntrinsicsID = 0; + unsigned SRAIntrinsicsID = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i64: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q; + SRAIntrinsicsID = 0; + break; + } + case MVT::v4i32: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; + break; + } + case MVT::v8i16: { + SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; + SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; + break; + } + } + + SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SHLIntrinsicsID, MVT::i32), + Node->getOperand(0), ShAmt); + + // In case of 1 bit sext, no need to shr + if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; + + if (SRAIntrinsicsID) { + Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(SRAIntrinsicsID, MVT::i32), + Tmp1, ShAmt); + } + return Tmp1; + } + + return SDValue(); +} + + SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasSSE2()) { + // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. + // There isn't any reason to disable it if the target processor supports it. + if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, - Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base DAG.getTargetConstant(1, MVT::i8), // Scale @@ -9183,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -9281,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: case ISD::SUBC: @@ -9415,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; - case X86ISD::PANDN: return "X86ISD::PANDN"; + case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; @@ -11766,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Want to form PANDN nodes, in the hopes of then easily combining them with - // OR and AND nodes to form PBLEND/PSIGN. + // Want to form ANDNP nodes: + // 1) In the hopes of then easily combining them with OR and AND nodes + // to form PBLEND/PSIGN. + // 2) To match ANDN packed intrinsics EVT VT = N->getValueType(0); - if (VT != MVT::v2i64) + if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); SDValue N0 = N->getOperand(0); @@ -11779,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // Check LHS for vnot if (N0.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1); // Check RHS for vnot if (N1.getOpcode() == ISD::XOR && ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) - return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0); + return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0); return SDValue(); } @@ -11810,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (Subtarget->hasSSSE3()) { if (VT == MVT::v2i64) { // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::PANDN) + if (N0.getOpcode() == X86ISD::ANDNP) std::swap(N0, N1); // or (and (m, x), (pandn m, y)) - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) { + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { SDValue Mask = N1.getOperand(0); SDValue X = N1.getOperand(1); SDValue Y; @@ -11822,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (N0.getOperand(1) == Mask) Y = N0.getOperand(0); - // Check to see if the mask appeared in both the AND and PANDN and + // Check to see if the mask appeared in both the AND and ANDNP and if (!Y.getNode()) return SDValue(); @@ -12166,8 +12276,8 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) { - DebugLoc dl = N->getDebugLoc(); +static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, + const X86TargetLowering *XTLI) { SDValue Op0 = N->getOperand(0); // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. @@ -12178,7 +12288,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86T ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !XTLI->getSubtarget()->is64Bit() && !DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG); + SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), + Ld->getChain(), Op0, DAG); DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1)); return FILDChain; } @@ -12549,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { case 'y': case 'x': case 'Y': + case 'l': return C_RegisterClass; case 'a': case 'b': @@ -12832,60 +12944,6 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -std::vector<unsigned> X86TargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() == 1) { - // FIXME: not handling fp-stack yet! - switch (Constraint[0]) { // GCC X86 Constraint Letters - default: break; // Unknown constraint letter - case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. - if (Subtarget->is64Bit()) { - if (VT == MVT::i32) - return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::R8D, X86::R9D, - X86::R10D,X86::R11D,X86::R12D, - X86::R13D,X86::R14D,X86::R15D, - X86::EBP, X86::ESP, 0); - else if (VT == MVT::i16) - return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::R8W,X86::R9W, - X86::R10W,X86::R11W,X86::R12W, - X86::R13W,X86::R14W,X86::R15W, - X86::BP, X86::SP, 0); - else if (VT == MVT::i8) - return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, - X86::SIL, X86::DIL, X86::R8B,X86::R9B, - X86::R10B,X86::R11B,X86::R12B, - X86::R13B,X86::R14B,X86::R15B, - X86::BPL, X86::SPL, 0); - - else if (VT == MVT::i64) - return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, - X86::RSI, X86::RDI, X86::R8, X86::R9, - X86::R10, X86::R11, X86::R12, - X86::R13, X86::R14, X86::R15, - X86::RBP, X86::RSP, 0); - - break; - } - // 32-bit fallthrough - case 'Q': // Q_REGS - if (VT == MVT::i32) - return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); - else if (VT == MVT::i16) - return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); - else if (VT == MVT::i8) - return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); - else if (VT == MVT::i64) - return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); - break; - } - } - - return std::vector<unsigned>(); -} - std::pair<unsigned, const TargetRegisterClass*> X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -12895,9 +12953,35 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC Constraint Letters switch (Constraint[0]) { default: break; + // TODO: Slight differences here in allocation order and leaving + // RIP in the class. Do they matter any more here than they do + // in the normal allocation? + case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. + if (Subtarget->is64Bit()) { + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, X86::GR32RegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16RegisterClass); + else if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, X86::GR8RegisterClass); + else if (VT == MVT::i64 || VT == MVT::f64) + return std::make_pair(0U, X86::GR64RegisterClass); + break; + } + // 32-bit fallthrough + case 'Q': // Q_REGS + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, X86::GR32_ABCDRegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16_ABCDRegisterClass); + else if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass); + else if (VT == MVT::i64) + return std::make_pair(0U, X86::GR64_ABCDRegisterClass); + break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8RegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16RegisterClass); @@ -12905,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); case 'R': // LEGACY_REGS - if (VT == MVT::i8) + if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, X86::GR8_NOREXRegisterClass); if (VT == MVT::i16) return std::make_pair(0U, X86::GR16_NOREXRegisterClass); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d61a1252304a..b6036782b865 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -169,8 +169,8 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - /// PANDN - and with not'd value. - PANDN, + /// ANDNP - Bitwise Logical AND NOT of Packed FP values. + ANDNP, /// PSIGNB/W/D - Copy integer sign. PSIGNB, PSIGNW, PSIGND, @@ -466,6 +466,12 @@ namespace llvm { /// fit into displacement field of the instruction. bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement = true); + + + /// isCalleePop - Determines whether the callee is required to pop its + /// own arguments. Callee pop is necessary to support tail calls. + bool isCalleePop(CallingConv::ID CallingConv, + bool is64Bit, bool IsVarArg, bool TailCallOpt); } //===--------------------------------------------------------------------===// @@ -590,10 +596,6 @@ namespace llvm { virtual ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - virtual const char *LowerXConstraint(EVT ConstraintVT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops @@ -823,6 +825,7 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 1ea8071053e9..0245e5c09644 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -150,11 +150,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - const TargetInstrDesc &TID = MI->getDesc(); + const MCInstrDesc &MCID = MI->getDesc(); unsigned Flags = 0; - if (TID.mayLoad()) + if (MCID.mayLoad()) Flags |= MachineMemOperand::MOLoad; - if (TID.mayStore()) + if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset), diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 33534cd82bdb..adcc747eb4b8 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1368,6 +1368,11 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), // (shl x, 1) ==> (add x, x) +// Note that if x is undef (immediate or otherwise), we could theoretically +// end up with the two uses of x getting different values, producing a result +// where the least significant bit is not 0. However, the probability of this +// happening is considered low enough that this is officially not a +// "real problem". def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index b506f5e0b81a..7cb870fabd62 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -112,31 +112,8 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // a pattern) and the FPI instruction should have emission info (e.g. opcode // encoding and asm printing info). -// Pseudo Instructions for FP stack return values. -def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0) -def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0) -def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0) - -// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when -// there are two values live out on the stack from a call or inlineasm. This -// magic is handled by the stackifier. It is not valid to emit FpGET_ST1* and -// then FpGET_ST0*. In addition, it is invalid for any FP-using operations to -// occur between them. -def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1) -def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1) -def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1) - -let Defs = [ST0] in { -def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR -def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR -def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR -} - -let Defs = [ST1] in { -def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR -def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR -def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR -} +// Pseudo Instruction for FP stack return values. +def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FpIf32, FpIf64 - Floating Point Pseudo Instruction template. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. @@ -147,19 +124,6 @@ class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> : class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> : FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>; -// Register copies. Just copies, the shortening ones do not truncate. -let neverHasSideEffects = 1 in { - def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; - def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; - def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; - def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; - def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; -} - // Factoring for arithmetic. multiclass FPBinary_rr<SDNode OpNode> { // Register op register -> register diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 7daa26492274..6d89bcc29e7b 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -460,6 +460,11 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, Requires<[HasCLMUL]>; + +class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; // FMA3 Instruction Templates diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7c9a9f7e8c50..b00109c9fa4d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -46,8 +46,8 @@ def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86pandn : SDNode<"X86ISD::PANDN", - SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, +def X86andnp : SDNode<"X86ISD::ANDNP", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, @@ -168,11 +168,13 @@ def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def sdmem : Operand<v2f64> { let PrintMethod = "printf64mem"; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } //===----------------------------------------------------------------------===// @@ -301,6 +303,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; // 256-bit bitconvert pattern fragments def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; +def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e2016eb2d6fb..55b5835f52a7 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -13,7 +13,6 @@ #include "X86InstrInfo.h" #include "X86.h" -#include "X86GenInstrInfo.inc" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -36,6 +35,9 @@ #include "llvm/MC/MCAsmInfo.h" #include <limits> +#define GET_INSTRINFO_CTOR +#include "X86GenInstrInfo.inc" + using namespace llvm; static cl::opt<bool> @@ -52,7 +54,12 @@ ReMatPICStubLoad("remat-pic-stub-load", cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) - : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), + : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::ADJCALLSTACKDOWN64 + : X86::ADJCALLSTACKDOWN32), + (tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::ADJCALLSTACKUP64 + : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { enum { TB_NOT_REVERSABLE = 1U << 31, @@ -293,12 +300,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, 0, 32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, 0, 32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, 0, 32 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, 0, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, { X86::MUL32r, X86::MUL32m, 1, 0 }, { X86::MUL64r, X86::MUL64m, 1, 0 }, @@ -403,10 +415,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV8rr, X86::MOV8rm, 0 }, { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, 32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, 32 }, { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, 16 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, @@ -417,6 +432,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, @@ -779,6 +796,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOVAPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: return true; @@ -800,6 +820,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOVAPSmr: case X86::MOVAPDmr: case X86::MOVDQAmr: + case X86::VMOVAPSYmr: + case X86::VMOVAPDYmr: + case X86::VMOVDQAYmr: case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: @@ -918,6 +941,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVUPSrm: case X86::MOVAPDrm: case X86::MOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: case X86::FsMOVAPSrm: @@ -1689,13 +1716,13 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } @@ -1789,7 +1816,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, .addMBB(UnCondBrIter->getOperand(0).getMBB()); BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) .addMBB(TargetBB); - MBB.addSuccessor(TargetBB); OldInst->eraseFromParent(); UnCondBrIter->eraseFromParent(); @@ -1968,6 +1994,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = X86::MOV8rr; } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = X86::MOVAPSrr; + else if (X86::VR256RegClass.contains(DestReg, SrcReg)) + Opc = X86::VMOVAPSYrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else @@ -2057,6 +2085,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::MOVAPSrm : X86::MOVAPSmr; else return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case 32: + assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); + // If stack is realigned we can use aligned stores. + if (isStackAligned) + return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr; + else + return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr; } } @@ -2083,7 +2118,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); - bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) @@ -2115,7 +2151,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); @@ -2224,7 +2261,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // FIXME: AsmPrinter doesn't know how to handle // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. @@ -2273,7 +2310,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize(); if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2542,7 +2579,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, unsigned Opc = MI->getOpcode(); unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires @@ -2588,9 +2625,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, return false; UnfoldStore &= FoldedStore; - const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.getRegClass(&RI); + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) @@ -2632,7 +2668,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the data processing instruction. - MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); + MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); if (FoldedStore) @@ -2685,7 +2721,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(MI->memoperands_begin(), @@ -2710,9 +2746,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned Index = I->second.second & 0xf; bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); - const TargetInstrDesc &TID = get(Opc); - const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); - unsigned NumDefs = TID.NumDefs; + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + unsigned NumDefs = MCID.NumDefs; std::vector<SDValue> AddrOps; std::vector<SDValue> BeforeOps; std::vector<SDValue> AfterOps; @@ -2756,13 +2792,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the data processing instruction. std::vector<EVT> VTs; const TargetRegisterClass *DstRC = 0; - if (TID.getNumDefs() > 0) { - DstRC = TID.OpInfo[0].getRegClass(&RI); + if (MCID.getNumDefs() > 0) { + DstRC = getRegClass(MCID, 0, &RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) + if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) VTs.push_back(VT); } if (Load) @@ -2845,6 +2881,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::VMOVDQUYrm: break; } switch (Opc2) { @@ -2867,6 +2908,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::VMOVDQUYrm: break; } @@ -3045,6 +3091,13 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + // AVX 256-bit support + { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, + { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, + { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, + { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, + { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, + { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d8950230d83d..5f2eba34ac45 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -19,6 +19,9 @@ #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" +#define GET_INSTRINFO_HEADER +#include "X86GenInstrInfo.inc" + namespace llvm { class X86RegisterInfo; class X86TargetMachine; @@ -611,7 +614,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) { isLeaMem(MI, Op); } -class X86InstrInfo : public TargetInstrInfoImpl { +class X86InstrInfo : public X86GenInstrInfo { X86TargetMachine &TM; const X86RegisterInfo RI; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8cab80824688..7eb07b0a97bd 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -251,6 +251,7 @@ class X86MemOperand<string printMethod> : Operand<iPTR> { let ParserMatchClass = X86MemAsmOperand; } +let OperandType = "OPERAND_MEMORY" in { def opaque32mem : X86MemOperand<"printopaquemem">; def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; @@ -267,6 +268,7 @@ def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; def f256mem : X86MemOperand<"printf256mem">; +} // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. @@ -274,6 +276,7 @@ def i8mem_NOREX : Operand<i64> { let PrintMethod = "printi8mem"; let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } // GPRs available for tailcall. @@ -287,6 +290,7 @@ def i32mem_TC : Operand<i32> { let PrintMethod = "printi32mem"; let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } // Special i64mem for addresses of load folding tail calls. These are not @@ -297,9 +301,11 @@ def i64mem_TC : Operand<i64> { let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, i8imm); let ParserMatchClass = X86MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } -let ParserMatchClass = X86AbsMemAsmOperand, +let OperandType = "OPERAND_PCREL", + ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { def i32imm_pcrel : Operand<i32>; def i16imm_pcrel : Operand<i16>; @@ -317,6 +323,7 @@ def brtarget8 : Operand<OtherVT>; def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; + let OperandType = "OPERAND_IMMEDIATE"; } class ImmSExtAsmOperandClass : AsmOperandClass { @@ -363,15 +370,18 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { // 16-bits but only 8 bits are significant. def i16i8imm : Operand<i16> { let ParserMatchClass = ImmSExti16i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64> { let ParserMatchClass = ImmSExti64i32AsmOperand; + let OperandType = "OPERAND_IMMEDIATE"; } // 64-bits but only 32 bits are significant, and those bits are treated as being @@ -438,8 +448,10 @@ def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; -def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate; -def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<"!Mode64Bit">; +def In64BitMode : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -669,7 +681,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), +def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", []>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{q}\t$imm", []>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b64c03a9b597..fe11d776804c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -512,6 +512,26 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W; +let Predicates = [HasAVX] in { + def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), + (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), + (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; + + def : Pat<(f32 (sint_to_fp GR32:$src)), + (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; + def : Pat<(f32 (sint_to_fp GR64:$src)), + (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; + def : Pat<(f64 (sint_to_fp GR32:$src)), + (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; + def : Pat<(f64 (sint_to_fp GR64:$src)), + (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; +} + defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, @@ -1473,83 +1493,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, - SDNode OpNode, int HasPat = 0, - list<list<dag>> Pattern = []> { + SDNode OpNode> { let Pattern = []<dag> in { defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - VEX_4V; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]), 0>, - OpSize, VEX_4V; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, - !if(HasPat, Pattern[0], // rr - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, - VR128:$src2)))]), - !if(HasPat, Pattern[2], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, TB; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB; defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, - !if(HasPat, Pattern[1], // rr - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 - VR128:$src2))))]), - !if(HasPat, Pattern[3], // rm - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))])>, - TB, OpSize; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB, OpSize; } } /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// -multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { +multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, + SDNode OpNode> { defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V; + !strconcat(OpcodeStr, "ps"), f256mem, + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, VEX_4V; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V; + !strconcat(OpcodeStr, "pd"), f256mem, + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and">; -defm VOR : sse12_fp_packed_logical_y<0x56, "or">; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; -let isCommutable = 0 in - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; +defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; +defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; +defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ - // single r+r - [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))], - // double r+r - [], - // single r+m - [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))], - // double r+m - []]>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -1991,11 +1996,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", + "movnti{l}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, TB, Requires<[HasSSE2]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movnti\t{$src, $dst|$dst, $src}", + "movnti{q}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, TB, Requires<[HasSSE2]>; } @@ -2006,13 +2011,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), // Prefetch intrinsic. def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, @@ -2037,7 +2042,10 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", } // The same as done above but for AVX. The 128-bit versions are the -// same, but re-encoded. The 256-bit does not support PI version. +// same, but re-encoded. The 256-bit does not support PI version, and +// doesn't need it because on sandy bridge the register is set to zero +// at the rename stage without using any execution unit, so SET0PSY +// and SET0PDY can be used for vector int instructions without penalty // FIXME: Change encoding to pseudo! This is blocked right now by the x86 // JIT implementatioan, it does not expand the instructions below like // X86MCInstLower does. @@ -2052,8 +2060,8 @@ def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; let ExeDomain = SSEPackedInt in -def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; +def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; } def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; @@ -2063,6 +2071,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>, + Requires<[HasAVX, OptForSpeed]>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Load/Store XCSR register //===----------------------------------------------------------------------===// @@ -2959,6 +2976,22 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; } +// These are the correct encodings of the instructions so that we know how to +// read correct assembly, even though we continue to emit the wrong ones for +// compatibility with Darwin's buggy assembler. +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOV64toSDrr FR64:$dst, GR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOVSDto64rr GR64:$dst, FR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; +def : InstAlias<"movq\t{$src, $dst|$dst, $src}", + (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; + //===---------------------------------------------------------------------===// // SSE2 - Move Quadword //===---------------------------------------------------------------------===// @@ -3589,6 +3622,16 @@ let Predicates = [HasSSE2] in def : Pat<(fextend (loadf32 addr:$src)), (CVTSS2SDrm addr:$src)>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +let Predicates = [HasAVX] in + def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), + addr:$src)>; + // bit_convert let Predicates = [HasXMMInt] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; @@ -3625,6 +3668,19 @@ let Predicates = [HasXMMInt] in { let Predicates = [HasAVX] in { def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; + def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; } // Move scalar to XMM zero-extended @@ -3807,6 +3863,8 @@ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. let Predicates = [HasSSE1] in { def : Pat<(alignedloadv4i32 addr:$src), (MOVAPSrm addr:$src)>; @@ -3835,8 +3893,9 @@ let Predicates = [HasSSE1] in { (MOVUPSmr addr:$dst, VR128:$src)>; } -// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter). +// Use vmovaps/vmovups for AVX integer load/store. let Predicates = [HasAVX] in { + // 128-bit load/store def : Pat<(alignedloadv4i32 addr:$src), (VMOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), @@ -3862,6 +3921,24 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (VMOVUPSmr addr:$dst, VR128:$src)>; + + // 256-bit load/store + def : Pat<(alignedloadv4i64 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv4i64 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedloadv8i32 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv8i32 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v4i64 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v8i32 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; } //===----------------------------------------------------------------------===// @@ -5160,33 +5237,52 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // CLMUL Instructions //===----------------------------------------------------------------------===// -// Only the AVX version of CLMUL instructions are described here. - // Carry-less Multiplication instructions -def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +let Constraints = "$src1 = $dst" in { +def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>; + +def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>; +} + +// AVX carry-less Multiplication instructions +def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -// Assembler Only -multiclass avx_vpclmul<string asm> { - def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; - - def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; -} -defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">; -defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; -defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; -defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; + +multiclass pclmul_alias<string asm, int immop> { + def : InstAlias<!strconcat("pclmul", asm, + "dq {$src, $dst|$dst, $src}"), + (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; + + def : InstAlias<!strconcat("pclmul", asm, + "dq {$src, $dst|$dst, $src}"), + (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; + + def : InstAlias<!strconcat("vpclmul", asm, + "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), + (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; + + def : InstAlias<!strconcat("vpclmul", asm, + "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), + (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; +} +defm : pclmul_alias<"hqhq", 0x11>; +defm : pclmul_alias<"hqlq", 0x01>; +defm : pclmul_alias<"lqhq", 0x10>; +defm : pclmul_alias<"lqlq", 0x00>; //===----------------------------------------------------------------------===// // AVX Instructions diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index f73cff39e86d..31de878343ef 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -411,6 +411,8 @@ let Uses = [RDX, RAX, RCX] in let Defs = [RAX, RDI], Uses = [RDX, RDI] in def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7; +def : InstAlias<"xstorerng", (XSTORE)>; + let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7; def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 55aceba9f270..ce8ef495c001 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -18,26 +18,32 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; + const MCInstrInfo &MCII; + const MCSubtargetInfo &STI; MCContext &Ctx; - bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) - : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { - Is64BitMode = is64Bit; + X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : MCII(mcii), STI(sti), Ctx(ctx) { } ~X86MCCodeEmitter() {} + bool is64BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) != 0; + } + static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } @@ -111,7 +117,7 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const TargetInstrDesc &Desc, + const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, @@ -119,23 +125,17 @@ public: raw_ostream &OS) const; void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, - const MCInst &MI, const TargetInstrDesc &Desc, + const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; }; } // end anonymous namespace -MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, - TargetMachine &TM, - MCContext &Ctx) { - return new X86MCCodeEmitter(TM, Ctx, false); -} - -MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, - TargetMachine &TM, - MCContext &Ctx) { - return new X86MCCodeEmitter(TM, Ctx, true); +MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new X86MCCodeEmitter(MCII, STI, Ctx); } /// isDisp8 - Return true if this signed displacement fits in a 8-bit @@ -245,7 +245,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode - assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode"); + assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode"); assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); @@ -284,7 +284,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). - (!Is64BitMode || BaseReg != 0)) { + (!is64BitMode() || BaseReg != 0)) { if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); @@ -379,7 +379,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, /// called VEX. void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, - const TargetInstrDesc &Desc, + const MCInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) @@ -586,7 +586,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand /// size, and 3) use of X86-64 extended registers. static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, - const TargetInstrDesc &Desc) { + const MCInstrDesc &Desc) { unsigned REX = 0; if (TSFlags & X86II::REX_W) REX |= 1 << 3; // set REX.W @@ -596,7 +596,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, unsigned NumOps = MI.getNumOperands(); // FIXME: MCInst should explicitize the two-addrness. bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; @@ -713,7 +713,7 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, /// Not present, it is -1. void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, - const TargetInstrDesc &Desc, + const MCInstrDesc &Desc, raw_ostream &OS) const { // Emit the lock opcode prefix as needed. @@ -729,7 +729,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Emit the address size opcode prefix as needed. if ((TSFlags & X86II::AdSize) || - (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand))) + (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) EmitByte(0x67, CurByte, OS); // Emit the operand size opcode prefix as needed. @@ -772,7 +772,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? - if (Is64BitMode) { + if (is64BitMode()) { if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) EmitByte(0x40 | REX, CurByte, OS); } @@ -803,7 +803,7 @@ void X86MCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); + const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded. @@ -814,9 +814,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) ++CurOp; - else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 793156ffce83..e38533555534 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -16,8 +16,8 @@ #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" -#include "X86MCAsmInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp index 8f3dd3222489..37110382379e 100644 --- a/lib/Target/X86/X86MachObjectWriter.cpp +++ b/lib/Target/X86/X86MachObjectWriter.cpp @@ -8,19 +8,541 @@ //===----------------------------------------------------------------------===// #include "X86.h" +#include "X86FixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Object/MachOFormat.h" + using namespace llvm; +using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { + void RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue); + void RecordTLVPRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void RecordX86Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + void RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); public: X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/Is64Bit) {} + + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { + if (Writer->is64Bit()) + RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + else + RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + } }; } +static bool isFixupKindRIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; +} + +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: return 0; + case FK_PCRel_2: + case FK_Data_2: return 1; + case FK_PCRel_4: + // FIXME: Remove these!!! + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case X86::reloc_signed_4byte: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // See <reloc.h>. + uint32_t FixupOffset = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + uint32_t FixupAddress = + Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + Value = Target.getConstant(); + + if (IsPCRel) { + // Compensate for the relocation offset, Darwin x86_64 relocations only have + // the addend and appear to have attempted to define it to be the actual + // expression addend without the PCrel bias. However, instructions with data + // following the relocation are not accommodated for (see comment below + // regarding SIGNED{1,2,4}), so it isn't exactly that either. + Value += 1LL << Log2Size; + } + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = macho::RIT_X86_64_Unsigned; + Index = 0; + + // FIXME: I believe this is broken, I don't think the linker can understand + // it. I think it would require a local relocation, but I'm not sure if that + // would work either. The official way to get an absolute PCrel relocation + // is to use an absolute symbol (which we don't support yet). + if (IsPCRel) { + IsExtern = 1; + Type = macho::RIT_X86_64_Branch; + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + + // Neither symbol can be modified. + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. Darwin 'as' doesn't + // implement most of these correctly. + if (IsPCRel) + report_fatal_error("unsupported pc-relative relocation of difference"); + + // The support for the situation where one or both of the symbols would + // require a local relocation is handled just like if the symbols were + // external. This is certainly used in the case of debug sections where the + // section has only temporary symbols and thus the symbols don't have base + // symbols. This is encoded using the section ordinal and non-extern + // relocation entries. + + // Darwin 'as' doesn't emit correct relocations for this (it ends up with a + // single SIGNED relocation); reject it for now. Except the case where both + // symbols don't have a base, equal but both NULL. + if (A_Base == B_Base && A_Base) + report_fatal_error("unsupported relocation with identical base"); + + Value += Writer->getSymbolAddress(&A_SD, Layout) - + (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout)); + Value -= Writer->getSymbolAddress(&B_SD, Layout) - + (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout)); + + if (A_Base) { + Index = A_Base->getIndex(); + IsExtern = 1; + } + else { + Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } + Type = macho::RIT_X86_64_Unsigned; + + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + + if (B_Base) { + Index = B_Base->getIndex(); + IsExtern = 1; + } + else { + Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } + Type = macho::RIT_X86_64_Subtractor; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(&SD); + + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand x86_64 relocation entries, and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( + Fragment->getParent()->getSection()); + if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) + Base = 0; + } + + // x86_64 almost always uses external relocations, except when there is no + // symbol to use as a base address (a local symbol with no preceding + // non-local symbol). + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); + } else if (Symbol->isInSection() && !Symbol->isVariable()) { + // The index is the section ordinal (1-based). + Index = SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + Value += Writer->getSymbolAddress(&SD, Layout); + + if (IsPCRel) + Value -= FixupAddress + (1 << Log2Size); + } else if (Symbol->isVariable()) { + const MCExpr *Value = Symbol->getVariableValue(); + int64_t Res; + bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, + Writer->getSectionAddressMap()); + if (isAbs) { + FixedValue = Res; + return; + } else { + report_fatal_error("unsupported relocation of variable '" + + Symbol->getName() + "'"); + } + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + } + + MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); + if (IsPCRel) { + if (IsRIPRel) { + if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // x86_64 distinguishes movq foo@GOTPCREL so that the linker can + // rewrite the movq to an leaq at link time if the symbol ends up in + // the same linkage unit. + if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) + Type = macho::RIT_X86_64_GOTLoad; + else + Type = macho::RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + Type = macho::RIT_X86_64_TLV; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + report_fatal_error("unsupported symbol modifier in relocation"); + } else { + Type = macho::RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and the + // linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the final + // offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = macho::RIT_X86_64_Signed1; break; + case 2: Type = macho::RIT_X86_64_Signed2; break; + case 4: Type = macho::RIT_X86_64_Signed4; break; + } + } + } else { + if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in branch " + "relocation"); + + Type = macho::RIT_X86_64_Branch; + } + } else { + if (Modifier == MCSymbolRefExpr::VK_GOT) { + Type = macho::RIT_X86_64_GOT; + } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which + // case all we do is set the PCrel bit in the relocation entry; this is + // used with exception handling, for example. The source is required to + // include any necessary offset directly. + Type = macho::RIT_X86_64_GOT; + IsPCRel = 1; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + report_fatal_error("TLVP symbol modifier should have been rip-rel"); + } else if (Modifier != MCSymbolRefExpr::VK_None) + report_fatal_error("unsupported symbol modifier in relocation"); + else + Type = macho::RIT_X86_64_Unsigned; + } + } + + // x86_64 always writes custom values into the fixups. + FixedValue = Value; + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + unsigned Log2Size, + uint64_t &FixedValue) { + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Type = macho::RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + report_fatal_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); + uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent()); + FixedValue += SecAddr; + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + report_fatal_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + // + // Note that there is no longer any semantic difference between these two + // relocation types from the linkers point of view, this is done solely for + // pedantic compatibility with 'as'. + Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference : + (unsigned)macho::RIT_Generic_LocalDifference; + Value2 = Writer->getSymbolAddress(B_SD, Layout); + FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == macho::RIT_Difference || + Type == macho::RIT_Generic_LocalDifference) { + macho::RelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (macho::RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value2; + Writer->addRelocation(Fragment->getParent(), MRE); + } + + macho::RelocationEntry MRE; + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + macho::RF_Scattered); + MRE.Word1 = Value; + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !is64Bit() && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend is + // zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) + + Target.getConstant()); + FixedValue += 1ULL << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (macho::RIT_Generic_TLV << 28)); // Type + Writer->addRelocation(Fragment->getParent(), MRE); +} + +void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA() && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + return; + } + + // If this is a difference or a defined symbol plus an offset, then we need a + // scattered relocation entry. Differences always require scattered + // relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a scattered + // relocation entry. + uint32_t Offset = Target.getConstant(); + if (IsPCRel) + Offset += 1 << Log2Size; + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + + // See <reloc.h>. + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + Type = macho::RIT_Vanilla; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + + // Check whether we need an external or internal relocation. + if (Writer->doesSymbolRequireExternRelocation(SD)) { + IsExtern = 1; + Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolOffset(SD); + } else { + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += Writer->getSectionAddress(&SymSD); + } + if (IsPCRel) + FixedValue -= Writer->getSectionAddress(Fragment->getParent()); + + Type = macho::RIT_Vanilla; + } + + // struct relocation_info (8 bytes) + macho::RelocationEntry MRE; + MRE.Word0 = FixupOffset; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, bool Is64Bit, uint32_t CPUType, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1ad6203af2f6..f2faf59367a1 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -39,6 +39,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CommandLine.h" + +#define GET_REGINFO_TARGET_DESC +#include "X86GenRegisterInfo.inc" + using namespace llvm; cl::opt<bool> @@ -49,18 +53,11 @@ ForceStackAlign("force-align-stack", X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? - X86::ADJCALLSTACKDOWN64 : - X86::ADJCALLSTACKDOWN32, - tm.getSubtarget<X86Subtarget>().is64Bit() ? - X86::ADJCALLSTACKUP64 : - X86::ADJCALLSTACKUP32), - TM(tm), TII(tii) { + : X86GenRegisterInfo(), TM(tm), TII(tii) { // Cache some information. const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); IsWin64 = Subtarget->isTargetWin64(); - StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Is64Bit) { SlotSize = 8; @@ -107,6 +104,21 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const { return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour); } +/// getCompactUnwindRegNum - This function maps the register to the number for +/// compact unwind encoding. Return -1 if the register isn't valid. +int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { + switch (getLLVMRegNum(RegNum, isEH)) { + case X86::EBX: case X86::RBX: return 1; + case X86::ECX: case X86::R12: return 2; + case X86::EDX: case X86::R13: return 3; + case X86::EDI: case X86::R14: return 4; + case X86::ESI: case X86::R15: return 5; + case X86::EBP: case X86::RBP: return 6; + } + + return -1; +} + int X86RegisterInfo::getSEHRegNum(unsigned i) const { int reg = getX86RegNum(i); @@ -495,18 +507,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::BPL); } - // Mark the x87 stack registers as reserved, since they don't behave normally - // with respect to liveness. We don't fully model the effects of x87 stack - // pushes and pops after stackification. - Reserved.set(X86::ST0); - Reserved.set(X86::ST1); - Reserved.set(X86::ST2); - Reserved.set(X86::ST3); - Reserved.set(X86::ST4); - Reserved.set(X86::ST5); - Reserved.set(X86::ST6); - Reserved.set(X86::ST7); - // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -517,13 +517,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { + // These 8-bit registers are part of the x86-64 extension even though their + // super-registers are old 32-bits. + Reserved.set(X86::SIL); + Reserved.set(X86::DIL); + Reserved.set(X86::BPL); + Reserved.set(X86::SPL); + for (unsigned n = 0; n != 8; ++n) { + // R8, R9, ... const unsigned GPR64[] = { X86::R8, X86::R9, X86::R10, X86::R11, X86::R12, X86::R13, X86::R14, X86::R15 }; - for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; - ++AI) + for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI) Reserved.set(Reg); // XMM8, XMM9, ... @@ -550,6 +557,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); @@ -608,7 +616,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool reseveCallFrame = TFI->hasReservedCallFrame(MF); int Opcode = I->getOpcode(); - bool isDestroy = Opcode == getCallFrameDestroyOpcode(); + bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; @@ -625,16 +633,17 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; - if (Opcode == getCallFrameSetupOpcode()) { + if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); } else { - assert(Opcode == getCallFrameDestroyOpcode()); + assert(Opcode == TII.getCallFrameDestroyOpcode()); // Factor out the amount the callee already popped. Amount -= CalleeAmt; @@ -657,7 +666,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, return; } - if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) { + if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. @@ -667,6 +676,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); + + // We are not tracking the stack pointer adjustment by the callee, so make + // sure we restore the stack pointer immediately after the call, there may + // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. + MachineBasicBlock::iterator B = MBB.begin(); + while (I != B && !llvm::prior(I)->getDesc().isCall()) + --I; MBB.insert(I, New); } } @@ -713,7 +729,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. - int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm()); + int Imm = (int)(MI.getOperand(i + 3).getImm()); + int Offset = FIOffset + Imm; + assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + "Requesting 64-bit offset in 32-bit immediate!"); MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. @@ -910,8 +929,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } } -#include "X86GenRegisterInfo.inc" - namespace { struct MSAH : public MachineFunctionPass { static char ID; @@ -920,10 +937,10 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = static_cast<const X86TargetMachine *>(&MF.getTarget()); - const X86RegisterInfo *X86RI = TM->getRegisterInfo(); + const TargetFrameLowering *TFI = TM->getFrameLowering(); MachineRegisterInfo &RI = MF.getRegInfo(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); - unsigned StackAlignment = X86RI->getStackAlignment(); + unsigned StackAlignment = TFI->getStackAlignment(); // Be over-conservative: scan over all vreg defs and find whether vector // registers are used. If yes, there is a possibility that vector register diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index dd3d3dcdcce5..a12eb1297f7e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -15,7 +15,9 @@ #define X86REGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "X86GenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "X86GenRegisterInfo.inc" namespace llvm { class Type; @@ -56,10 +58,6 @@ private: /// unsigned SlotSize; - /// StackAlign - Default stack alignment. - /// - unsigned StackAlign; - /// StackPtr - X86 physical register used as stack ptr. /// unsigned StackPtr; @@ -75,8 +73,6 @@ public: /// register identifier. static unsigned getX86RegNum(unsigned RegNo); - unsigned getStackAlignment() const { return StackAlign; } - /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum /// (created by TableGen) for target dependencies. int getDwarfRegNum(unsigned RegNum, bool isEH) const; @@ -85,6 +81,10 @@ public: // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; + /// getCompactUnwindRegNum - This function maps the register to the number for + /// compact unwind encoding. Return -1 if the register isn't valid. + int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const; + /// Code Generation virtual methods... /// diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index f1d149c3fbc7..203722a66162 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -206,15 +206,22 @@ let Namespace = "X86" in { def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>; } - // Floating point stack registers - def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; - def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>; - def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>; - def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>; - def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>; - def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>; - def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>; - def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; + class STRegister<string Name, list<Register> A> : Register<Name> { + let Aliases = A; + } + + // Floating point stack registers. These don't map one-to-one to the FP + // pseudo registers, but we still mark them as aliasing FP registers. That + // way both kinds can be live without exceeding the stack depth. ST registers + // are only live around inline assembly. + def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>; + def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>; + def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>; + def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>; + def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>; + def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>; + def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>; + def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>; // Status flags register def EFLAGS : Register<"flags">; @@ -279,58 +286,23 @@ let Namespace = "X86" in { // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" // cannot be encoded. def GR8 : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, - R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - static const unsigned X86_GR8_AO_64[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, - X86::R8B, X86::R9B, X86::R10B, X86::R11B, - X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL - }; - - GR8Class::iterator - GR8Class::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) - return X86_GR8_AO_64; - else - return begin(); - } - - GR8Class::iterator - GR8Class::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); - // Does the function dedicate RBP / EBP to being a frame ptr? - if (!Subtarget.is64Bit()) - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - return begin() + 8; - else if (TFI->hasFP(MF) || MFI->getReserveFP()) - // If so, don't allocate SPL or BPL. - return array_endof(X86_GR8_AO_64) - 1; - else - // If not, just don't allocate SPL. - return array_endof(X86_GR8_AO_64); - } + (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, + R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> { + let AltOrders = [(sub GR8, AH, BH, CH, DH)]; + let AltOrderSelect = [{ + return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit(); }]; } def GR16 : RegisterClass<"X86", [i16], 16, - [AX, CX, DX, SI, DI, BX, BP, SP, - R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { + (add AX, CX, DX, SI, DI, BX, BP, SP, + R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)]; } def GR32 : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, - R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { + (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, + R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } @@ -338,8 +310,8 @@ def GR32 : RegisterClass<"X86", [i32], 32, // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. def GR64 : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { + (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + RBX, R14, R15, R12, R13, RBP, RSP, RIP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit), (GR32 sub_32bit)]; @@ -348,16 +320,13 @@ def GR64 : RegisterClass<"X86", [i64], 64, // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. -def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>; +def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>; // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, - [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>; +def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>; // Control registers. -def CONTROL_REG : RegisterClass<"X86", [i64], 64, - [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8, - CR9, CR10, CR11, CR12, CR13, CR14, CR15]>; +def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>; // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" @@ -365,99 +334,69 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64, // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, // and GR64_ABCD are classes for registers that support 8-bit h-register // operations. -def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>; -def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>; -def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { +def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>; +def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>; +def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } -def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { +def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi), (GR16_ABCD sub_16bit)]; } -def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { +def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi), (GR16_ABCD sub_16bit), (GR32_ABCD sub_32bit)]; } -def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> { +def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } -def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, - R8, R9, R11, RIP]> { +def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, + R8, R9, R11, RIP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit), (GR32_TC sub_32bit)]; } -def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, - R8, R9, R11]>; +def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, + R8, R9, R11)>; // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, AH, CH, DH, BL, BH]> { - let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; - iterator allocation_order_end(const MachineFunction &MF) const; - }]; - let MethodBodies = [{ - // In 64-bit mode, it's not safe to blindly allocate H registers. - static const unsigned X86_GR8_NOREX_AO_64[] = { - X86::AL, X86::CL, X86::DL, X86::BL - }; - - GR8_NOREXClass::iterator - GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) - return X86_GR8_NOREX_AO_64; - else - return begin(); - } - - GR8_NOREXClass::iterator - GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - if (Subtarget.is64Bit()) - return array_endof(X86_GR8_NOREX_AO_64); - else - return end(); - } + (add AL, CL, DL, AH, CH, DH, BL, BH)> { + let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)]; + let AltOrderSelect = [{ + return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit(); }]; } // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, - [AX, CX, DX, SI, DI, BX, BP, SP]> { + (add AX, CX, DX, SI, DI, BX, BP, SP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)]; } // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { + (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit)]; } // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { + (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit), (GR32_NOREX sub_32bit)]; } // GR32_NOSP - GR32 registers except ESP. -def GR32_NOSP : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP, - R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { +def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } // GR64_NOSP - GR64 registers except RSP (and RIP). -def GR64_NOSP : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP]> { +def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit), (GR32_NOSP sub_32bit)]; @@ -466,36 +405,30 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, // GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except // ESP. def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32, - [EAX, ECX, EDX, ESI, EDI, EBX, EBP]> { + (and GR32_NOREX, GR32_NOSP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit)]; } // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { + (and GR64_NOREX, GR64_NOSP)> { let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), (GR16_NOREX sub_16bit), (GR32_NOREX_NOSP sub_32bit)]; } // A class to support the 'A' assembler constraint: EAX then EDX. -def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { +def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi), (GR16_ABCD sub_16bit)]; } // Scalar SSE2 floating point registers. -def FR32 : RegisterClass<"X86", [f32], 32, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, - XMM12, XMM13, XMM14, XMM15]>; +def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; -def FR64 : RegisterClass<"X86", [f64], 64, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, - XMM12, XMM13, XMM14, XMM15]>; +def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; // FIXME: This sets up the floating point register files as though they are f64 @@ -504,37 +437,31 @@ def FR64 : RegisterClass<"X86", [f64], 64, // faster on common hardware. In reality, this should be controlled by a // command line option or something. -def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; -def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; -def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>; +def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>; +def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>; +def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>; // Floating point stack registers (these are not allocatable by the // register allocator - the floating point stackifier is responsible // for transforming FPn allocations to STn registers) -def RST : RegisterClass<"X86", [f80, f64, f32], 32, - [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> { +def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { let isAllocatable = 0; } // Generic vector registers: VR64 and VR128. -def VR64: RegisterClass<"X86", [x86mmx], 64, - [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; -def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, - XMM12, XMM13, XMM14, XMM15]> { +def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; +def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (add FR32)> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; } def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256, - [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, - YMM12, YMM13, YMM14, YMM15]> { + (sequence "YMM%u", 0, 15)> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } // Status flags registers. -def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { +def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> { let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 481e821030b3..5e6c659e5393 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file implements the X86 specific subclass of TargetSubtarget. +// This file implements the X86 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" -#include "X86GenSubtarget.inc" #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "X86GenSubtargetInfo.inc" + using namespace llvm; #if defined(_MSC_VER) @@ -154,7 +157,7 @@ const char *X86Subtarget::getBZeroEntry() const { /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { - if (Is64Bit) + if (In64BitMode) return false; return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } @@ -170,73 +173,6 @@ unsigned X86Subtarget::getSpecialAddressLatency() const { return 200; } -/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the -/// specified arguments. If we can't run cpuid on the host, return true. -static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - #if defined(__GNUC__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. - asm ("movq\t%%rbx, %%rsi\n\t" - "cpuid\n\t" - "xchgq\t%%rbx, %%rsi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - int registers[4]; - __cpuid(registers, value); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; - #endif -#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - #if defined(__GNUC__) - asm ("movl\t%%ebx, %%esi\n\t" - "cpuid\n\t" - "xchgl\t%%ebx, %%esi\n\t" - : "=a" (*rEAX), - "=S" (*rEBX), - "=c" (*rECX), - "=d" (*rEDX) - : "a" (value)); - return false; - #elif defined(_MSC_VER) - __asm { - mov eax,value - cpuid - mov esi,rEAX - mov dword ptr [esi],eax - mov esi,rEBX - mov dword ptr [esi],ebx - mov esi,rECX - mov dword ptr [esi],ecx - mov esi,rEDX - mov dword ptr [esi],edx - } - return false; - #endif -#endif - return true; -} - -static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { - Family = (EAX >> 8) & 0xf; // Bits 8 - 11 - Model = (EAX >> 4) & 0xf; // Bits 4 - 7 - if (Family == 6 || Family == 0xf) { - if (Family == 0xf) - // Examine extended family ID if family ID is F. - Family += (EAX >> 20) & 0xff; // Bits 20 - 27 - // Examine extended model ID if family ID is 6 or F. - Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 - } -} - void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; union { @@ -244,50 +180,66 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { char c[12]; } text; - if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) return; - GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 15) & 1) HasCMov = true; - if ((EDX >> 23) & 1) X86SSELevel = MMX; - if ((EDX >> 25) & 1) X86SSELevel = SSE1; - if ((EDX >> 26) & 1) X86SSELevel = SSE2; - if (ECX & 0x1) X86SSELevel = SSE3; - if ((ECX >> 9) & 1) X86SSELevel = SSSE3; - if ((ECX >> 19) & 1) X86SSELevel = SSE41; - if ((ECX >> 20) & 1) X86SSELevel = SSE42; + if ((EDX >> 15) & 1) HasCMov = true; ToggleFeature(X86::FeatureCMOV); + if ((EDX >> 23) & 1) X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); + if ((EDX >> 25) & 1) X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); + if ((EDX >> 26) & 1) X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); + if (ECX & 0x1) X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); + if ((ECX >> 9) & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3); + if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41); + if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42); // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; } + //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX); bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); - HasAES = IsIntel && ((ECX >> 25) & 0x1); + HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); ToggleFeature(X86::FeatureCLMUL); + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); ToggleFeature(X86::FeatureFMA3); + HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT); + HasAES = IsIntel && ((ECX >> 25) & 0x1); ToggleFeature(X86::FeatureAES); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. unsigned Family = 0; unsigned Model = 0; - DetectFamilyModel(EAX, Family, Model); - IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); + X86_MC::DetectFamilyModel(EAX, Family, Model); + if (IsAMD || (Family == 6 && Model >= 13)) { + IsBTMemSlow = true; + ToggleFeature(X86::FeatureSlowBTMem); + } // If it's Nehalem, unaligned memory access is fast. - if (Family == 15 && Model == 26) + if (Family == 15 && Model == 26) { IsUAMemFast = true; + ToggleFeature(X86::FeatureFastUAMem); + } - GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - HasX86_64 = (EDX >> 29) & 0x1; - HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); - HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); + X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if ((EDX >> 29) & 0x1) { + HasX86_64 = true; + ToggleFeature(X86::Feature64Bit); + } + if (IsAMD && ((ECX >> 6) & 0x1)) { + HasSSE4A = true; + ToggleFeature(X86::FeatureSSE4A); + } + if (IsAMD && ((ECX >> 16) & 0x1)) { + HasFMA4 = true; + ToggleFeature(X86::FeatureFMA4); + } } } -X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, - bool is64Bit) - : PICStyle(PICStyles::None) +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit) + : X86GenSubtargetInfo(TT, CPU, FS) + , PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) , HasCMov(false) @@ -306,73 +258,66 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) - , Is64Bit(is64Bit) { - - // default to hard float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Hard; - + , In64BitMode(is64Bit) { // Determine default and user specified characteristics - if (!FS.empty()) { + if (!FS.empty() || !CPU.empty()) { + std::string CPUName = CPU; + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } + + // Make sure 64-bit features are available in 64-bit mode. (But make sure + // SSE2 can be turned off explicitly.) + std::string FullFS = FS; + if (In64BitMode) { + if (!FullFS.empty()) + FullFS = "+64bit,+sse2," + FullFS; + else + FullFS = "+64bit,+sse2"; + } + // If feature string is not empty, parse features string. - std::string CPU = sys::getHostCPUName(); - ParseSubtargetFeatures(FS, CPU); - // All X86-64 CPUs also have SSE2, however user might request no SSE via - // -mattr, so don't force SSELevel here. - if (HasAVX) - X86SSELevel = NoMMXSSE; + ParseSubtargetFeatures(CPUName, FullFS); } else { // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); - // Make sure SSE2 is enabled; it is available on all X86-64 CPUs. - if (Is64Bit && !HasAVX && X86SSELevel < SSE2) - X86SSELevel = SSE2; - } - // If requesting codegen for X86-64, make sure that 64-bit features - // are enabled. - if (Is64Bit) { - HasX86_64 = true; + // Make sure 64-bit features are available in 64-bit mode. + if (In64BitMode) { + HasX86_64 = true; ToggleFeature(X86::Feature64Bit); + HasCMov = true; ToggleFeature(X86::FeatureCMOV); - // All 64-bit cpus have cmov support. - HasCMov = true; + if (!HasAVX && X86SSELevel < SSE2) { + X86SSELevel = SSE2; + ToggleFeature(X86::FeatureSSE1); + ToggleFeature(X86::FeatureSSE2); + } + } } + + // It's important to keep the MCSubtargetInfo feature bits in sync with + // target data structure which is shared with MC code emitter, etc. + if (In64BitMode) + ToggleFeature(X86::Mode64Bit); + + if (HasAVX) + X86SSELevel = NoMMXSSE; DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); - assert((!Is64Bit || HasX86_64) && + assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. - if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || - isTargetSolaris() || Is64Bit) + if (StackAlignOverride) + stackAlignment = StackAlignOverride; + else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || In64BitMode) stackAlignment = 16; - - if (StackAlignment) - stackAlignment = StackAlignment; -} - -/// IsCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. -bool X86Subtarget::IsCalleePop(bool IsVarArg, - CallingConv::ID CallingConv) const { - if (IsVarArg) - return false; - - switch (CallingConv) { - default: - return false; - case CallingConv::X86_StdCall: - return !is64Bit(); - case CallingConv::X86_FastCall: - return !is64Bit(); - case CallingConv::X86_ThisCall: - return !is64Bit(); - case CallingConv::Fast: - return GuaranteedTailCallOpt; - case CallingConv::GHC: - return GuaranteedTailCallOpt; - } } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 286a7982a699..6d22027b7aa8 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares the X86 specific subclass of TargetSubtarget. +// This file declares the X86 specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// @@ -15,12 +15,16 @@ #define X86SUBTARGET_H #include "llvm/ADT/Triple.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/CallingConv.h" #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "X86GenSubtargetInfo.inc" + namespace llvm { class GlobalValue; +class StringRef; class TargetMachine; /// PICStyles - The X86 backend supports a number of different styles of PIC. @@ -35,7 +39,7 @@ enum Style { }; } -class X86Subtarget : public TargetSubtarget { +class X86Subtarget : public X86GenSubtargetInfo { protected: enum X86SSEEnum { NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 @@ -108,16 +112,17 @@ protected: Triple TargetTriple; private: - /// Is64Bit - True if the processor supports 64-bit instructions and - /// pointer size is 64 bit. - bool Is64Bit; + /// In64BitMode - True if compiling for 64-bit, false for 32-bit. + bool In64BitMode; public: /// This constructor initializes the data members to match that /// of the specified triple. /// - X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit); + X86Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, + unsigned StackAlignOverride, bool is64Bit); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -130,14 +135,13 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID /// instruction. void AutoDetectSubtargetFeatures(); - bool is64Bit() const { return Is64Bit; } + bool is64Bit() const { return In64BitMode; } PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } @@ -195,7 +199,7 @@ public: } bool isTargetWin64() const { - return Is64Bit && (isTargetMingw() || isTargetWindows()); + return In64BitMode && (isTargetMingw() || isTargetWindows()); } bool isTargetEnvMacho() const { @@ -203,7 +207,7 @@ public: } bool isTargetWin32() const { - return !Is64Bit && (isTargetMingw() || isTargetWindows()); + return !In64BitMode && (isTargetMingw() || isTargetWindows()); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } @@ -248,9 +252,6 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; - - /// IsCalleePop - Test whether a function should pop its own arguments. - bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 74833291dc7a..9cab0e089098 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "X86.h" #include "llvm/PassManager.h" @@ -24,22 +23,6 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { - if (TheTriple.getArch() == Triple::x86_64) - return new X86_64MCAsmInfoDarwin(TheTriple); - else - return new X86MCAsmInfoDarwin(TheTriple); - } - - if (TheTriple.isOSWindows()) - return new X86MCAsmInfoCOFF(TheTriple); - - return new X86ELFMCAsmInfo(TheTriple); -} - static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &_OS, @@ -62,15 +45,11 @@ extern "C" void LLVMInitializeX86Target() { RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); - // Register the target asm info. - RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo); - RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); - // Register the code emitter. TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createX86_32MCCodeEmitter); + createX86MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createX86_64MCCodeEmitter); + createX86MCCodeEmitter); // Register the asm backend. TargetRegistry::RegisterAsmBackend(TheX86_32Target, @@ -87,8 +66,9 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : X86TargetMachine(T, TT, FS, false), + : X86TargetMachine(T, TT, CPU, FS, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : (getSubtargetImpl()->isTargetCygMing() || @@ -103,8 +83,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : X86TargetMachine(T, TT, FS, true), + : X86TargetMachine(T, TT, CPU, FS, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), InstrInfo(*this), TSInfo(*this), @@ -115,9 +96,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, /// X86TargetMachine ctor - Create an X86 target. /// X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS, is64Bit), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { DefRelocModel = getRelocationModel(); @@ -182,6 +164,10 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, // Finally, if we have "none" as our PIC style, force to static mode. if (Subtarget.getPICStyle() == PICStyles::None) setRelocationModel(Reloc::Static); + + // default to hard float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Hard; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 597392251e6a..885334a365fe 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -43,7 +43,8 @@ private: public: X86TargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { llvm_unreachable("getInstrInfo not implemented"); @@ -87,7 +88,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, const std::string &M, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -113,7 +114,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 9093de691582..a1d73c6b4f99 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -1,14 +1,11 @@ set(LLVM_TARGET_DEFINITIONS XCore.td) -tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(XCoreGenRegisterNames.inc -gen-register-enums) -tablegen(XCoreGenRegisterInfo.inc -gen-register-desc) -tablegen(XCoreGenInstrNames.inc -gen-instr-enums) -tablegen(XCoreGenInstrInfo.inc -gen-instr-desc) +tablegen(XCoreGenRegisterInfo.inc -gen-register-info) +tablegen(XCoreGenInstrInfo.inc -gen-instr-info) tablegen(XCoreGenAsmWriter.inc -gen-asm-writer) tablegen(XCoreGenDAGISel.inc -gen-dag-isel) tablegen(XCoreGenCallingConv.inc -gen-callingconv) -tablegen(XCoreGenSubtarget.inc -gen-subtarget) +tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(XCoreCodeGen XCoreAsmPrinter.cpp @@ -16,7 +13,6 @@ add_llvm_target(XCoreCodeGen XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp - XCoreMCAsmInfo.cpp XCoreRegisterInfo.cpp XCoreSubtarget.cpp XCoreTargetMachine.cpp @@ -25,3 +21,4 @@ add_llvm_target(XCoreCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..c3b3dc9e647d --- /dev/null +++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(LLVMXCoreDesc + XCoreMCTargetDesc.cpp + XCoreMCAsmInfo.cpp + ) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/XCore/MCTargetDesc/Makefile b/lib/Target/XCore/MCTargetDesc/Makefile new file mode 100644 index 000000000000..de61543bfe9c --- /dev/null +++ b/lib/Target/XCore/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMXCoreDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index 42ab1b31d57a..42ab1b31d57a 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index 840392263881..840392263881 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp new file mode 100644 index 000000000000..939d97c9d87c --- /dev/null +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -0,0 +1,56 @@ +//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides XCore specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "XCoreMCTargetDesc.h" +#include "XCoreMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "XCoreGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "XCoreGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "XCoreGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createXCoreMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitXCoreMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializeXCoreMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo); +} + +static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitXCoreMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializeXCoreMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget, + createXCoreMCSubtargetInfo); +} + +extern "C" void LLVMInitializeXCoreMCAsmInfo() { + RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget); +} diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h new file mode 100644 index 000000000000..3cfc3764a62c --- /dev/null +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h @@ -0,0 +1,40 @@ +//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides XCore specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREMCTARGETDESC_H +#define XCOREMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheXCoreTarget; + +} // End llvm namespace + +// Defines symbolic names for XCore registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "XCoreGenRegisterInfo.inc" + +// Defines symbolic names for the XCore instructions. +// +#define GET_INSTRINFO_ENUM +#include "XCoreGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "XCoreGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile index 6c1ef886031b..b823c4ed37e9 100644 --- a/lib/Target/XCore/Makefile +++ b/lib/Target/XCore/Makefile @@ -12,13 +12,12 @@ LIBRARYNAME = LLVMXCoreCodeGen TARGET = XCore # Make sure that tblgen is run, first thing. -BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ - XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \ - XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \ +BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \ + XCoreGenAsmWriter.inc \ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ - XCoreGenSubtarget.inc + XCoreGenSubtargetInfo.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index 8937fbe123c6..b8fb0cac319b 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -15,6 +15,7 @@ #ifndef TARGET_XCORE_H #define TARGET_XCORE_H +#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -25,17 +26,6 @@ namespace llvm { FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM); - extern Target TheXCoreTarget; - } // end namespace llvm; -// Defines symbolic names for XCore registers. This defines a mapping from -// register name to register number. -// -#include "XCoreGenRegisterNames.inc" - -// Defines symbolic names for the XCore instructions. -// -#include "XCoreGenInstrNames.inc" - #endif diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 8f06dd32662f..1a43714d63b9 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -16,7 +16,6 @@ #include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreSubtarget.h" -#include "XCoreMCAsmInfo.h" #include "XCoreTargetMachine.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -27,6 +26,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -114,7 +114,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *GVSym = Mang->getSymbol(GV); - Constant *C = GV->getInitializer(); + const Constant *C = GV->getInitializer(); unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType()); // Mark the start of the global diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 8cabbbf16c35..6d040e052659 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1591,21 +1591,18 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // XCore Inline Assembly Support //===----------------------------------------------------------------------===// -std::vector<unsigned> XCoreTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const -{ - if (Constraint.size() != 1) - return std::vector<unsigned>(); - - switch (Constraint[0]) { +std::pair<unsigned, const TargetRegisterClass*> +XCoreTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { default : break; case 'r': - return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2, - XCore::R3, XCore::R4, XCore::R5, - XCore::R6, XCore::R7, XCore::R8, - XCore::R9, XCore::R10, XCore::R11, 0); - break; + return std::make_pair(0U, XCore::GRRegsRegisterClass); + } } - return std::vector<unsigned>(); + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index a8d67d4ad21e..9c803bef6dd2 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -148,9 +148,9 @@ namespace llvm { SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 9cb6a7d17b5e..f90481f3fbc9 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -18,11 +18,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "XCoreGenInstrInfo.inc" +#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#define GET_INSTRINFO_CTOR +#include "XCoreGenInstrInfo.inc" + namespace llvm { namespace XCore { @@ -38,7 +41,7 @@ namespace XCore { using namespace llvm; XCoreInstrInfo::XCoreInstrInfo() - : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)), + : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), RI(*this) { } diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 977fe8dd550a..840b1e163652 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -17,9 +17,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "XCoreRegisterInfo.h" +#define GET_INSTRINFO_HEADER +#include "XCoreGenInstrInfo.inc" + namespace llvm { -class XCoreInstrInfo : public TargetInstrInfoImpl { +class XCoreInstrInfo : public XCoreGenInstrInfo { const XCoreRegisterInfo RI; public: XCoreInstrInfo(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 46c9e57c1af5..357a4a083582 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -33,11 +33,13 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#define GET_REGINFO_TARGET_DESC +#include "XCoreGenRegisterInfo.inc" + using namespace llvm; XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii) - : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP), - TII(tii) { + : XCoreGenRegisterInfo(), TII(tii) { } // helper functions @@ -193,7 +195,16 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, #endif Offset += StackSize; - + + unsigned FrameReg = getFrameRegister(MF); + + // Special handling of DBG_VALUE instructions. + if (MI.isDebugValue()) { + MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + // fold constant into offset. Offset += MI.getOperand(i + 1).getImm(); MI.getOperand(i + 1).ChangeToImmediate(0); @@ -205,7 +216,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset/=4; bool FP = TFI->hasFP(MF); - + unsigned Reg = MI.getOperand(0).getReg(); bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); @@ -216,7 +227,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (FP) { bool isUs = isImmUs(Offset); - unsigned FramePtr = XCore::R10; if (!isUs) { if (!RS) @@ -228,18 +238,18 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, switch (MI.getOpcode()) { case XCore::LDWFI: BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) - .addReg(FramePtr) + .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); break; case XCore::STWFI: BuildMI(MBB, II, dl, TII.get(XCore::STW_3r)) .addReg(Reg, getKillRegState(isKill)) - .addReg(FramePtr) + .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); break; case XCore::LDAWFI: BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) - .addReg(FramePtr) + .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); break; default: @@ -249,18 +259,18 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, switch (MI.getOpcode()) { case XCore::LDWFI: BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) - .addReg(FramePtr) + .addReg(FrameReg) .addImm(Offset); break; case XCore::STWFI: BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) .addReg(Reg, getKillRegState(isKill)) - .addReg(FramePtr) + .addReg(FrameReg) .addImm(Offset); break; case XCore::LDAWFI: BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) - .addReg(FramePtr) + .addReg(FrameReg) .addImm(Offset); break; default: @@ -328,6 +338,3 @@ unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned XCoreRegisterInfo::getRARegister() const { return XCore::LR; } - -#include "XCoreGenRegisterInfo.inc" - diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 7a9bc9fb8705..801d9eba2171 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -15,7 +15,9 @@ #define XCOREREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" -#include "XCoreGenRegisterInfo.h.inc" + +#define GET_REGINFO_HEADER +#include "XCoreGenRegisterInfo.inc" namespace llvm { diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td index 09510976dd06..c3542304a4ec 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.td +++ b/lib/Target/XCore/XCoreRegisterInfo.td @@ -44,13 +44,13 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>; // def GRRegs : RegisterClass<"XCore", [i32], 32, // Return values and arguments - [R0, R1, R2, R3, + (add R0, R1, R2, R3, // Not preserved across procedure calls R11, // Callee save - R4, R5, R6, R7, R8, R9, R10]>; + R4, R5, R6, R7, R8, R9, R10)>; // Reserved -def RRegs : RegisterClass<"XCore", [i32], 32, [CP, DP, SP, LR]> { +def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> { let isAllocatable = 0; } diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index 78a6fa5b2edb..ad069bf138a7 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -7,14 +7,22 @@ // //===----------------------------------------------------------------------===// // -// This file implements the XCore specific subclass of TargetSubtarget. +// This file implements the XCore specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "XCoreSubtarget.h" #include "XCore.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "XCoreGenSubtargetInfo.inc" + using namespace llvm; -XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS) +XCoreSubtarget::XCoreSubtarget(const std::string &TT, + const std::string &CPU, const std::string &FS) + : XCoreGenSubtargetInfo(TT, CPU, FS) { } diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h index f8be3ec86189..7b29fa236710 100644 --- a/lib/Target/XCore/XCoreSubtarget.h +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -7,32 +7,35 @@ // //===----------------------------------------------------------------------===// // -// This file declares the XCore specific subclass of TargetSubtarget. +// This file declares the XCore specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef XCORESUBTARGET_H #define XCORESUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetMachine.h" - #include <string> +#define GET_SUBTARGETINFO_HEADER +#include "XCoreGenSubtargetInfo.inc" + namespace llvm { +class StringRef; -class XCoreSubtarget : public TargetSubtarget { +class XCoreSubtarget : public XCoreGenSubtargetInfo { public: /// This constructor initializes the data members to match that /// of the specified triple. /// - XCoreSubtarget(const std::string &TT, const std::string &FS); + XCoreSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; } // End llvm namespace diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 30da2c896c0f..342966ae5c86 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "XCoreMCAsmInfo.h" #include "XCoreTargetMachine.h" #include "XCore.h" #include "llvm/Module.h" @@ -21,9 +20,10 @@ using namespace llvm; /// XCoreTargetMachine ctor - Create an ILP32 architecture model /// XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS) - : LLVMTargetMachine(T, TT), - Subtarget(TT, FS), + : LLVMTargetMachine(T, TT, CPU, FS), + Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), @@ -41,5 +41,4 @@ bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, // Force static initialization. extern "C" void LLVMInitializeXCoreTarget() { RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget); - RegisterAsmInfo<XCoreMCAsmInfo> Y(TheXCoreTarget); } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 24daadcb6bf4..6235ac3a6a1a 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &CPU, const std::string &FS); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameLowering *getFrameLowering() const { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 54a7f679e01c..fa007cfc6513 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -493,7 +493,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + std::vector<Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; @@ -733,12 +733,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 179b150c1478..3de7bfceed1b 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_library(LLVMipo ArgumentPromotion.cpp ConstantMerge.cpp DeadArgumentElimination.cpp - DeadTypeElimination.cpp ExtractGV.cpp FunctionAttrs.cpp GlobalDCE.cpp diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index d4eaf0c4a3ec..15177650f4e5 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -208,7 +208,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); - std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); + std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); @@ -244,11 +244,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) @@ -647,7 +647,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + std::vector<Type*> Params; // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; @@ -659,13 +659,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); const Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); - std::vector<const Type*> RetTypes; + std::vector<Type*> RetTypes; if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { @@ -822,11 +822,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp deleted file mode 100644 index d3d4963b63eb..000000000000 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ /dev/null @@ -1,112 +0,0 @@ -//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is used to cleanup the output of GCC. It eliminate names for types -// that are unused in the entire translation unit, using the FindUsedTypes pass. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "deadtypeelim" -#include "llvm/Transforms/IPO.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Module.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/DerivedTypes.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); - -namespace { - struct DTE : public ModulePass { - static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(ID) { - initializeDTEPass(*PassRegistry::getPassRegistry()); - } - - // doPassInitialization - For this pass, it removes global symbol table - // entries for primitive types. These are never used for linking in GCC and - // they make the output uglier to look at, so we nuke them. - // - // Also, initialize instance variables. - // - bool runOnModule(Module &M); - - // getAnalysisUsage - This function needs FindUsedTypes to do its job... - // - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<FindUsedTypes>(); - } - }; -} - -char DTE::ID = 0; -INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination", - false, false) -INITIALIZE_PASS_DEPENDENCY(FindUsedTypes) -INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false) - -ModulePass *llvm::createDeadTypeEliminationPass() { - return new DTE(); -} - - -// ShouldNukeSymtabEntry - Return true if this module level symbol table entry -// should be eliminated. -// -static inline bool ShouldNukeSymtabEntry(const Type *Ty){ - // Nuke all names for primitive types! - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return true; - - // Nuke all pointers to primitive types as well... - if (const PointerType *PT = dyn_cast<PointerType>(Ty)) - if (PT->getElementType()->isPrimitiveType() || - PT->getElementType()->isIntegerTy()) - return true; - - return false; -} - -// run - For this pass, it removes global symbol table entries for primitive -// types. These are never used for linking in GCC and they make the output -// uglier to look at, so we nuke them. Also eliminate types that are never used -// in the entire program as indicated by FindUsedTypes. -// -bool DTE::runOnModule(Module &M) { - bool Changed = false; - - TypeSymbolTable &ST = M.getTypeSymbolTable(); - const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes(); - std::set<const Type*> UsedTypes(T.begin(), T.end()); - - // Check the symbol table for superfluous type entries... - // - // Grab the 'type' plane of the module symbol... - TypeSymbolTable::iterator TI = ST.begin(); - TypeSymbolTable::iterator TE = ST.end(); - while ( TI != TE ) { - // If this entry should be unconditionally removed, or if we detect that - // the type is not used, remove it. - const Type *RHS = TI->second; - if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) { - ST.remove(TI++); - ++NumKilled; - Changed = true; - } else { - ++TI; - // We only need to leave one name for each type. - UsedTypes.erase(RHS); - } - } - - return Changed; -} - -// vim: sw=2 diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index cdf7b76dd087..4ac721dd0600 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1999,9 +1999,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const std::vector<Function*> &Ctors) { // If we made a change, reassemble the initializer list. - std::vector<Constant*> CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); - CSVals.push_back(0); + Constant *CSVals[2]; + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); + CSVals[1] = 0; + + const StructType *StructTy = + cast <StructType>( + cast<ArrayType>(GCL->getType()->getElementType())->getElementType()); // Create the new init list. std::vector<Constant*> CAList; @@ -2016,12 +2020,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 0x7fffffff); } - CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); + CAList.push_back(ConstantStruct::get(StructTy, CSVals)); } // Create the array initializer. - const Type *StructTy = - cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); @@ -2218,42 +2220,40 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); // Return the modified struct. - return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), - STy->isPacked()); - } else { - ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); - const SequentialType *InitTy = cast<SequentialType>(Init->getType()); - - uint64_t NumElts; - if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) - NumElts = ATy->getNumElements(); - else - NumElts = cast<VectorType>(InitTy)->getNumElements(); - + return ConstantStruct::get(STy, Elts); + } + + ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); + const SequentialType *InitTy = cast<SequentialType>(Init->getType()); - // Break up the array into elements. - if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) - Elts.push_back(cast<Constant>(*i)); - } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { - for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) - Elts.push_back(cast<Constant>(*i)); - } else if (isa<ConstantAggregateZero>(Init)) { - Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); - } else { - assert(isa<UndefValue>(Init) && "This code is out of sync with " - " ConstantFoldLoadThroughGEPConstantExpr"); - Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); - } + uint64_t NumElts; + if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy)) + NumElts = ATy->getNumElements(); + else + NumElts = cast<VectorType>(InitTy)->getNumElements(); + + // Break up the array into elements. + if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) { + for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i) + Elts.push_back(cast<Constant>(*i)); + } else if (isa<ConstantAggregateZero>(Init)) { + Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType())); + } else { + assert(isa<UndefValue>(Init) && "This code is out of sync with " + " ConstantFoldLoadThroughGEPConstantExpr"); + Elts.assign(NumElts, UndefValue::get(InitTy->getElementType())); + } - assert(CI->getZExtValue() < NumElts); - Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + assert(CI->getZExtValue() < NumElts); + Elts[CI->getZExtValue()] = + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); - if (Init->getType()->isArrayTy()) - return ConstantArray::get(cast<ArrayType>(InitTy), Elts); - return ConstantVector::get(Elts); - } + if (Init->getType()->isArrayTy()) + return ConstantArray::get(cast<ArrayType>(InitTy), Elts); + return ConstantVector::get(Elts); } /// CommitValueTo - We have decided that Addr (which satisfies the predicate diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 21dcb519d9c9..31ce95f53d33 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -25,7 +25,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeConstantMergePass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); - initializeDTEPass(Registry); initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); @@ -63,10 +62,6 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadArgEliminationPass()); } -void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createDeadTypeEliminationPass()); -} - void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionAttrsPass()); } diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 52ecf17b8f9b..659476b139e4 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -267,7 +267,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) CastInst* CI = new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); Value *Args[] = { CI, Inst->getArgOperand(1) }; - CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); + CallInst::Create(ThrowLongJmp, Args, "", Inst); SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; @@ -386,7 +386,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) GetSetJmpMap(Func), BufPtr, ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) }; - CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst); + CallInst::Create(AddSJToMap, Args, "", Inst); // We are guaranteed that there are no values live across basic blocks // (because we are "not in SSA form" yet), but there can still be values live @@ -482,7 +482,7 @@ void LowerSetJmp::visitCallInst(CallInst& CI) std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], - Params.begin(), Params.end(), CI.getName(), Term); + Params, CI.getName(), Term); II->setCallingConv(CI.getCallingConv()); II->setAttributes(CI.getAttributes()); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index f74144338a61..7796d05b7bc6 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -218,7 +218,6 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: - case Type::OpaqueTyID: case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -733,7 +732,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { ++i; } - CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end()); + CallInst *CI = Builder.CreateCall(F, Args); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType()->isVoidTy()) { diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 2f3baebf4864..b7e63dc4484c 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -175,8 +175,7 @@ bool PruneEH::SimplifyFunction(Function *F) { if (II->doesNotThrow()) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index a69076510806..0fbaff1509a7 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -28,8 +28,8 @@ #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -143,8 +143,7 @@ static void RemoveDeadConstant(Constant *C) { assert(C->use_empty() && "Constant is not dead!"); SmallPtrSet<Constant*, 4> Operands; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - if (isa<DerivedType>(C->getOperand(i)->getType()) && - OnlyUsedBy(C->getOperand(i), C)) + if (OnlyUsedBy(C->getOperand(i), C)) Operands.insert(cast<Constant>(C->getOperand(i))); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. @@ -174,13 +173,19 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { } } -// Strip the symbol table of its names. -static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { - for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) { - if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg")) - ++TI; - else - ST.remove(TI++); +// Strip any named types of their names. +static void StripTypeNames(Module &M, bool PreserveDbgInfo) { + std::vector<StructType*> StructTypes; + M.findUsedStructTypes(StructTypes); + + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isAnonymous() || STy->getName().empty()) continue; + + if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) + continue; + + STy->setName(""); } } @@ -221,7 +226,7 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { } // Remove all names from types. - StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo); + StripTypeNames(M, PreserveDbgInfo); return true; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a08446e5d519..64ea36fb1e9d 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1400,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) @@ -1424,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index ef67701921f9..537f2b318aa9 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -217,10 +217,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getArgOperand(0)->getType(), - CI.getArgOperand(1)->getType(), - CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); Changed = true; } } @@ -355,7 +355,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -372,7 +374,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); + const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); + // FIXME: Try to simplify vectors of integers. + if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); @@ -412,7 +416,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getTrue(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + const StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } @@ -425,7 +430,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + const StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Add, 0); } } @@ -452,7 +458,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -472,7 +479,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -503,7 +511,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(LHS->getType()), Builder->getFalse() }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); return InsertValueInst::Create(Struct, Mul, 0); } } // FALL THROUGH @@ -532,7 +540,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + Constant *Struct = + ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } @@ -1109,13 +1118,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NC = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args.begin(), Args.end()); + II->getUnwindDest(), Args); NC->takeName(II); cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); } else { CallInst *CI = cast<CallInst>(Caller); - NC = Builder->CreateCall(Callee, Args.begin(), Args.end()); + NC = Builder->CreateCall(Callee, Args); NC->takeName(CI); if (CI->isTailCall()) cast<CallInst>(NC)->setTailCall(); @@ -1178,7 +1187,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - const Type *NestTy = 0; + Type *NestTy = 0; Attributes NestAttr = Attribute::None; // Look for a parameter marked with the 'nest' attribute. @@ -1240,7 +1249,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // Handle this by synthesizing a new function type, equal to FTy // with the chain parameter inserted. - std::vector<const Type*> NewTypes; + std::vector<Type*> NewTypes; NewTypes.reserve(FTy->getNumParams()+1); // Insert the chain's type into the list of parameter types, which may @@ -1280,11 +1289,11 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NewCaller = InvokeInst::Create(NewCallee, II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end()); + NewArgs); cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end()); + NewCaller = CallInst::Create(NewCallee, NewArgs); if (cast<CallInst>(Caller)->isTailCall()) cast<CallInst>(NewCaller)->setTailCall(); cast<CallInst>(NewCaller)-> diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 199902aa41f8..82c734e0b829 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,6 +30,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + // Cannot look past anything that might overflow. + OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); + if (OBI && !OBI->hasNoUnsignedWrap()) { + Scale = 1; + Offset = 0; + return Val; + } + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. @@ -1208,7 +1216,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && Call->getCalledFunction()->getName() == "sqrt" && - Call->getNumArgOperands() == 1) { + Call->getNumArgOperands() == 1 && + Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); if (Arg && Arg->getOpcode() == Instruction::FPExt && CI.getType()->isFloatTy() && diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index c7ed098cbf88..c78760b20692 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -42,13 +42,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { static bool HasAddOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); } /// AddWithOverflow - Compute Result = In1+In2, returning true if the result @@ -77,13 +76,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, static bool HasSubOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); } /// SubWithOverflow - Compute Result = In1-In2, returning true if the result @@ -128,8 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + return RHS->isMaxValue(true); case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; @@ -278,8 +276,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); @@ -828,7 +825,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; } } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + } else if (DivRHS->isNegative()) { // Divisor is < 0. if (DivI->isExact()) RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 @@ -1028,7 +1025,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { + if (!XorCST->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; @@ -1061,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { const APInt &NotSignBit = XorCST->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() @@ -1087,22 +1084,33 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // have its sign bit set or if it is an equality comparison. // Extending a relational comparison when we're checking the sign // bit would not work. - if (Cast->hasOneUse() && - (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) { - uint32_t BitWidth = - cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth(); - APInt NewCST = AndCST->getValue().zext(BitWidth); - APInt NewCI = RHSV.zext(BitWidth); - Value *NewAnd = + if (ICI.isEquality() || + (!AndCST->isNegative() && RHSV.isNonNegative())) { + Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantInt::get(ICI.getContext(), NewCST), - LHSI->getName()); + ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, - ConstantInt::get(ICI.getContext(), NewCI)); + ConstantExpr::getZExt(RHS, Cast->getSrcTy())); } } - + + // If the LHS is an AND of a zext, and we have an equality compare, we can + // shrink the and/compare to the smaller type, eliminating the cast. + if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) { + const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy()); + // Make sure we don't compare the upper bits, SimplifyDemandedBits + // should fold the icmp to true/false in that case. + if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantExpr::getTrunc(AndCST, Ty)); + NewAnd->takeName(LHSI); + return new ICmpInst(ICI.getPredicate(), NewAnd, + ConstantExpr::getTrunc(RHS, Ty)); + } + } + // If this is: (X >> C1) & C2 != C3 (where any shift and any compare // could exist), turn it into (X & (C2 << C1)) != (C3 << C1). This // happens a LOT in code produced by the C front-end, for bitfield @@ -1396,18 +1404,27 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, case Instruction::Xor: // For the xor case, we can xor two constants together, eliminating // the explicit xor. - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) - return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), ConstantExpr::getXor(RHS, BOC)); - - // FALLTHROUGH + } else if (RHSV == 0) { + // Replace ((xor A, B) != 0) with (A != B) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + BO->getOperand(1)); + } + break; case Instruction::Sub: - // Replace (([sub|xor] A, B) != 0) with (A != B) - if (RHSV == 0) + // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants. + if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) { + if (BO->hasOneUse()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(1), + ConstantExpr::getSub(BOp0C, RHS)); + } else if (RHSV == 0) { + // Replace ((sub A, B) != 0) with (A != B) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), BO->getOperand(1)); + } break; - case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! @@ -1434,7 +1451,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); - + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); @@ -1659,9 +1680,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // result and the overflow bit. Module *M = I.getParent()->getParent()->getParent(); - const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, - &NewType, 1); + NewType); InstCombiner::BuilderTy *Builder = IC.Builder; @@ -1701,8 +1722,8 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, Builder->SetInsertPoint(OrigAdd); Module *M = I.getParent()->getParent()->getParent(); - const Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); Value *Add = Builder->CreateExtractValue(Call, 0); @@ -2364,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO1->getOperand(0)); } - if (CI->getValue().isMaxSignedValue()) { + if (CI->isMaxValue(true)) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2d29403097ce..630a6fee3990 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -691,14 +691,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { bool hasNegative = false; for (unsigned i = 0; !hasNegative && i != VWidth; ++i) if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) hasNegative = true; if (hasNegative) { std::vector<Constant *> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index aeb3c3e880fa..5733c20828c6 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -796,7 +796,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // So at this point we know we have (Y -> OtherAddOp): // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z - if (SI.getType()->isFloatingPointTy()) { + if (SI.getType()->isFPOrFPVectorTy()) { NegVal = Builder->CreateFNeg(SubOp->getOperand(1)); } else { NegVal = Builder->CreateNeg(SubOp->getOperand(1)); @@ -810,7 +810,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Builder->CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p"); - if (SI.getType()->isFloatingPointTy()) + if (SI.getType()->isFPOrFPVectorTy()) return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel); else return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 92c10f5546c0..ab98ef9fccf8 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -785,6 +785,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() && + !Src->hasOneUse()) + return 0; + // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. @@ -1191,7 +1199,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (EV.getNumIndices() > 1) // Extract the remaining indices out of the constant indexed by the // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + return ExtractValueInst::Create(V, EV.getIndices().slice(1)); else return ReplaceInstUsesWith(EV, V); } @@ -1214,7 +1222,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace @@ -1232,9 +1240,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); + ArrayRef<unsigned>(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -1246,7 +1254,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); + ArrayRef<unsigned>(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index b90221301041..3f2c4123882d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -561,25 +561,24 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( Edge += Successors; } + ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size()); GlobalVariable *EdgeTableGV = new GlobalVariable( *M, EdgeTableTy, true, GlobalValue::InternalLinkage, - ConstantArray::get(EdgeTableTy, - &EdgeTable[0], Succs.size() * Preds.size()), + ConstantArray::get(EdgeTableTy, V), "__llvm_gcda_edge_table"); EdgeTableGV->setUnnamedAddr(true); return EdgeTableGV; } Constant *GCOVProfiler::getStartFileFunc() { - const Type *Args[] = { Type::getInt8PtrTy(*Ctx) }; const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + Type::getInt8PtrTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { - const Type *Args[] = { + Type *Args[] = { Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row }; @@ -589,7 +588,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - const Type *Args[2] = { + Type *Args[2] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name }; @@ -599,7 +598,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() { } Constant *GCOVProfiler::getEmitArcsFunc() { - const Type *Args[] = { + Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 182a43d396c0..75416637db4f 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -376,7 +376,7 @@ namespace llvm { public: static const StructType *get(LLVMContext& C) { return( StructType::get( - C, TypeBuilder<types::i<32>, xcompile>::get(C), // type + TypeBuilder<types::i<32>, xcompile>::get(C), // type TypeBuilder<types::i<32>, xcompile>::get(C), // array size TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr NULL)); @@ -1062,7 +1062,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue, CallInst::Create( increment ? llvmIncrementHashFunction : llvmDecrementHashFunction, - args.begin(), args.end(), "", insertPoint); + args, "", insertPoint); } } diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 7435bc37fbe1..445a5b6f6074 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -62,8 +62,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -134,7 +133,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those // types. - const Type *GlobalDtorElems[2] = { + Type *GlobalDtorElems[2] = { Type::getInt32Ty(Mod->getContext()), FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() }; @@ -164,7 +163,8 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { GlobalVariable *GlobalDtors = new GlobalVariable( *Mod, ArrayType::get(GlobalDtorElemTy, 1), false, GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors"); - dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false)); + + dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem)); GlobalDtors->setInitializer(ConstantArray::get( cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors)); } diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index fcf914f8baa0..c223da60e0fa 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMScalarOpts LoopUnswitch.cpp LowerAtomic.cpp MemCpyOptimizer.cpp + ObjCARC.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 53e46400dca8..cb9b5bebc5c7 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -437,12 +437,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { MemDepResult InstDep = MD->getDependency(Inst); - // Ignore non-local store liveness. + // Ignore any store where we can't find a local dependence. // FIXME: cross-block DSE would be fun. :) - if (InstDep.isNonLocal() || - // Ignore self dependence, which happens in the entry block of the - // function. - InstDep.getInst() == Inst) + if (InstDep.isNonLocal() || InstDep.isUnknown()) continue; // If we're storing the same value back to a pointer that we just @@ -478,7 +475,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (Loc.Ptr == 0) continue; - while (!InstDep.isNonLocal()) { + while (!InstDep.isNonLocal() && !InstDep.isUnknown()) { // Get the memory clobbered by the instruction we depend on. MemDep will // skip any instructions that 'Loc' clearly doesn't interact with. If we // end up depending on a may- or must-aliased load, then we can't optimize @@ -542,24 +539,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { /// HandleFree - Handle frees of entire structures whose dependency is a store /// to a field of that structure. bool DSE::HandleFree(CallInst *F) { + bool MadeChange = false; + MemDepResult Dep = MD->getDependency(F); - do { - if (Dep.isNonLocal()) return false; - + + while (!Dep.isNonLocal() && !Dep.isUnknown()) { Instruction *Dependency = Dep.getInst(); if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) - return false; + return MadeChange; Value *DepPointer = GetUnderlyingObject(getStoredPointerOperand(Dependency)); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) - return false; + return MadeChange; // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency, *MD); ++NumFastStores; + MadeChange = true; // Inst's old Dependency is now deleted. Compute the next dependency, // which may also be dead, as in @@ -567,9 +566,9 @@ bool DSE::HandleFree(CallInst *F) { // s[1] = 0; // This has just been deleted. // free(s); Dep = MD->getDependency(F); - } while (!Dep.isNonLocal()); + }; - return true; + return MadeChange; } /// handleEndBlock - Remove dead stores to stack-allocated locations in the diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 2515fd112c1b..87b7317ad2dd 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -91,6 +91,7 @@ namespace { uint32_t nextValueNumber; Expression create_expression(Instruction* I); + Expression create_extractvalue_expression(ExtractValueInst* EI); uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } @@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> { // ValueTable Internal Functions //===----------------------------------------------------------------------===// - Expression ValueTable::create_expression(Instruction *I) { Expression e; e.type = I->getType(); @@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) { OI != OE; ++OI) e.varargs.push_back(lookup_or_add(*OI)); - if (CmpInst *C = dyn_cast<CmpInst>(I)) + if (CmpInst *C = dyn_cast<CmpInst>(I)) { e.opcode = (C->getOpcode() << 8) | C->getPredicate(); - else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) { return e; } +Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { + assert(EI != 0 && "Not an ExtractValueInst?"); + Expression e; + e.type = EI->getType(); + e.opcode = 0; + + IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); + if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + // EI might be an extract from one of our recognised intrinsics. If it + // is we'll synthesize a semantically equivalent expression instead on + // an extract value expression. + switch (I->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + e.opcode = Instruction::Add; + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + e.opcode = Instruction::Sub; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + e.opcode = Instruction::Mul; + break; + default: + break; + } + + if (e.opcode != 0) { + // Intrinsic recognized. Grab its args to finish building the expression. + assert(I->getNumArgOperands() == 2 && + "Expect two args for recognised intrinsics."); + e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); + e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); + return e; + } + } + + // Not a recognised intrinsic. Fall back to producing an extract value + // expression. + e.opcode = EI->getOpcode(); + for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + + for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + + return e; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -227,21 +275,19 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { // Non-local case. const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); - // FIXME: call/call dependencies for readonly calls should return def, not - // clobber! Move the checking logic to MemDep! + // FIXME: Move the checking logic to MemDep! CallInst* cdep = 0; // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { const NonLocalDepEntry *I = &deps[i]; - // Ignore non-local dependencies. if (I->getResult().isNonLocal()) continue; - // We don't handle non-depedencies. If we already have a call, reject + // We don't handle non-definitions. If we already have a call, reject // instruction dependencies. - if (I->getResult().isClobber() || cdep != 0) { + if (!I->getResult().isDef() || cdep != 0) { cdep = 0; break; } @@ -338,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::ExtractElement: case Instruction::InsertElement: case Instruction::ShuffleVector: - case Instruction::ExtractValue: case Instruction::InsertValue: case Instruction::GetElementPtr: exp = create_expression(I); break; + case Instruction::ExtractValue: + exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); + break; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; @@ -1192,8 +1240,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // escaping uses to any values that are operands to these PHIs. for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { PHINode *P = NewPHIs[i]; - for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) - AA->addEscapingUse(P->getOperandUse(2*ii)); + for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + AA->addEscapingUse(P->getOperandUse(jj)); + } } } @@ -1224,12 +1274,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber() && - Deps[0].getResult().getInst()->getParent() == LI->getParent()) { + if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); - dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; + dbgs() << " has unknown dependencies\n"; ); return false; } @@ -1245,6 +1294,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); + if (DepInfo.isUnknown()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + if (DepInfo.isClobber()) { // The address being loaded in this non-local block may not be the same as // the pointer operand of the load if PHI translation occurs. Make sure @@ -1305,6 +1359,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { continue; } + assert(DepInfo.isDef() && "Expecting def here"); + Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. @@ -1691,10 +1747,22 @@ bool GVN::processLoad(LoadInst *L) { return false; } + if (Dep.isUnknown()) { + DEBUG( + // fast print dep, using operator<< on instruction is too slow. + dbgs() << "GVN: load "; + WriteAsOperand(dbgs(), L); + dbgs() << " has unknown dependence\n"; + ); + return false; + } + // If it is defined in another block, try harder. if (Dep.isNonLocal()) return processNonLocalLoad(L); + assert(Dep.isDef() && "Expecting def here"); + Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { Value *StoredVal = DepSI->getValueOperand(); @@ -2133,8 +2201,11 @@ bool GVN::performPRE(Function &F) { // Because we have added a PHI-use of the pointer value, it has now // "escaped" from alias analysis' perspective. We need to inform // AA of this. - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii)); + for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; + ++ii) { + unsigned jj = PHINode::getOperandNumForIncomingValue(ii); + VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); + } if (MD) MD->invalidateCachedPointerInfo(Phi); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 04ee7c8ccb3b..dee3d38d72af 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -52,30 +52,32 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; -STATISTIC(NumRemoved , "Number of aux indvars removed"); -STATISTIC(NumWidened , "Number of indvars widened"); -STATISTIC(NumInserted, "Number of canonical indvars added"); -STATISTIC(NumReplaced, "Number of exit values replaced"); -STATISTIC(NumLFTR , "Number of loop exit tests replaced"); -STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); -STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); -STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); - -// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis -// and referenced here. -namespace llvm { - extern bool DisableIVRewrite; -} +STATISTIC(NumRemoved , "Number of aux indvars removed"); +STATISTIC(NumWidened , "Number of indvars widened"); +STATISTIC(NumInserted , "Number of canonical indvars added"); +STATISTIC(NumReplaced , "Number of exit values replaced"); +STATISTIC(NumLFTR , "Number of loop exit tests replaced"); +STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); +STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); +STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); + +static cl::opt<bool> DisableIVRewrite( + "disable-iv-rewrite", cl::Hidden, + cl::desc("Disable canonical induction variable rewriting")); namespace { class IndVarSimplify : public LoopPass { @@ -84,12 +86,14 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) { + IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), + Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -101,36 +105,46 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<IVUsers>(); + if (!DisableIVRewrite) + AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - AU.addPreserved<IVUsers>(); + if (!DisableIVRewrite) + AU.addPreserved<IVUsers>(); AU.setPreservesCFG(); } private: + virtual void releaseMemory() { + DeadInsts.clear(); + } + bool isValidRewrite(Value *FromVal, Value *ToVal); + void HandleFloatingPointIV(Loop *L, PHINode *PH); + void RewriteNonIntegerIVs(Loop *L); + + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); + void SimplifyIVUsers(SCEVExpander &Rewriter); + void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter); + + bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned, - PHINode *IVPhi); - void RewriteNonIntegerIVs(Loop *L); + bool IsSigned); + + void SimplifyCongruentIVs(Loop *L); + + void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); - void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - - void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - void SinkUnusedInvariants(Loop *L); - - void HandleFloatingPointIV(Loop *L, PHINode *PH); }; } @@ -197,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } -/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken -/// count expression can be safely and cheaply expanded into an instruction -/// sequence that can be used by LinearFunctionTestReplace. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || - BackedgeTakenCount->isZero()) - return false; +//===----------------------------------------------------------------------===// +// RewriteNonIntegerIVs and helpers. Prefer integer IVs. +//===----------------------------------------------------------------------===// - if (!L->getExitingBlock()) +/// ConvertToSInt - Convert APF to an integer, if possible. +static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { + bool isExact = false; + if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) return false; - - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) + // See if we can convert this to an int64_t + uint64_t UIntVal; + if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, + &isExact) != APFloat::opOK || !isExact) return false; - - // Special case: If the backedge-taken count is a UDiv, it's very likely a - // UDiv that ScalarEvolution produced in order to compute a precise - // expression, rather than a UDiv from the user's code. If we can't find a - // UDiv in the code with some simple searching, assume the former and forego - // rewriting the loop. - if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return false; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != BackedgeTakenCount) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != BackedgeTakenCount) - return false; - } - } + IntVal = UIntVal; return true; } -/// getBackedgeIVType - Get the widest type used by the loop test after peeking -/// through Truncs. +/// HandleFloatingPointIV - If the loop has floating induction variable +/// then insert corresponding integer induction variable if possible. +/// For example, +/// for(double i = 0; i < 10000; ++i) +/// bar(i) +/// is converted into +/// for(int i = 0; i < 10000; ++i) +/// bar((double)i); /// -/// TODO: Unnecessary once LinearFunctionTestReplace is removed. -static const Type *getBackedgeIVType(Loop *L) { - if (!L->getExitingBlock()) - return 0; +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { + unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) - return 0; + // Check incoming value. + ConstantFP *InitValueVal = + dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); - ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!Cond) - return 0; + int64_t InitValue; + if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) + return; - const Type *Ty = 0; - for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); - OI != OE; ++OI) { - assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); - TruncInst *Trunc = dyn_cast<TruncInst>(*OI); - if (!Trunc) - continue; + // Check IV increment. Reject this PN if increment operation is not + // an add or increment value can not be represented by an integer. + BinaryOperator *Incr = + dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); + if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - return Trunc->getSrcTy(); + // If this is not an add of the PHI with a constantfp, or if the constant fp + // is not an integer, bail out. + ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); + int64_t IncValue; + if (IncValueVal == 0 || Incr->getOperand(0) != PN || + !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) + return; + + // Check Incr uses. One user is PN and the other user is an exit condition + // used by the conditional terminator. + Value::use_iterator IncrUse = Incr->use_begin(); + Instruction *U1 = cast<Instruction>(*IncrUse++); + if (IncrUse == Incr->use_end()) return; + Instruction *U2 = cast<Instruction>(*IncrUse++); + if (IncrUse != Incr->use_end()) return; + + // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't + // only used by a branch, we can't transform it. + FCmpInst *Compare = dyn_cast<FCmpInst>(U1); + if (!Compare) + Compare = dyn_cast<FCmpInst>(U2); + if (Compare == 0 || !Compare->hasOneUse() || + !isa<BranchInst>(Compare->use_back())) + return; + + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); + + // We need to verify that the branch actually controls the iteration count + // of the loop. If not, the new IV can overflow and no one will notice. + // The branch block must be in the loop and one of the successors must be out + // of the loop. + assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); + if (!L->contains(TheBr->getParent()) || + (L->contains(TheBr->getSuccessor(0)) && + L->contains(TheBr->getSuccessor(1)))) + return; + + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't + // transform it. + ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); + int64_t ExitValue; + if (ExitValueVal == 0 || + !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) + return; + + // Find new predicate for integer comparison. + CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; + switch (Compare->getPredicate()) { + default: return; // Unknown comparison. + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - return Ty; -} -/// LinearFunctionTestReplace - This method rewrites the exit condition of the -/// loop to be a canonical != comparison against the incremented loop induction -/// variable. This pass is able to rewrite the exit tests of any loop where the -/// SCEV analysis can determine a loop-invariant trip count of the loop, which -/// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify:: -LinearFunctionTestReplace(Loop *L, - const SCEV *BackedgeTakenCount, - PHINode *IndVar, - SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); - BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + // We convert the floating point induction variable to a signed i32 value if + // we can. This is only safe if the comparison will not overflow in a way + // that won't be trapped by the integer equivalent operations. Check for this + // now. + // TODO: We could use i64 if it is native and the range requires it. - // If the exiting block is not the same as the backedge block, we must compare - // against the preincremented value, otherwise we prefer to compare against - // the post-incremented value. - Value *CmpIndVar; - const SCEV *RHS = BackedgeTakenCount; - if (L->getExitingBlock() == L->getLoopLatch()) { - // Add one to the "backedge-taken" count to get the trip count. - // If this addition may overflow, we have to be more pessimistic and - // cast the induction variable before doing the add. - const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); - const SCEV *N = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - if ((isa<SCEVConstant>(N) && !N->isZero()) || - SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { - // No overflow. Cast the sum. - RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); - } else { - // Potential overflow. Cast before doing the add. - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - RHS = SE->getAddExpr(RHS, - SE->getConstant(IndVar->getType(), 1)); + // The start/stride/exit values must all fit in signed i32. + if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) + return; + + // If not actually striding (add x, 0.0), avoid touching the code. + if (IncValue == 0) + return; + + // Positive and negative strides have different safety conditions. + if (IncValue > 0) { + // If we have a positive stride, we require the init to be less than the + // exit value and an equality or less than comparison. + if (InitValue >= ExitValue || + NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) + return; + + uint32_t Range = uint32_t(ExitValue-InitValue); + if (NewPred == CmpInst::ICMP_SLE) { + // Normalize SLE -> SLT, check for infinite loop. + if (++Range == 0) return; // Range overflows. } - // The BackedgeTaken expression contains the number of times that the - // backedge branches to the loop header. This is one less than the - // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + unsigned Leftover = Range % uint32_t(IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) + return; + } else { - // We have to use the preincremented value... - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - CmpIndVar = IndVar; + // If we have a negative stride, we require the init to be greater than the + // exit value and an equality or greater than comparison. + if (InitValue >= ExitValue || + NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) + return; + + uint32_t Range = uint32_t(InitValue-ExitValue); + if (NewPred == CmpInst::ICMP_SGE) { + // Normalize SGE -> SGT, check for infinite loop. + if (++Range == 0) return; // Range overflows. + } + + unsigned Leftover = Range % uint32_t(-IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) + return; } - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(RHS, L) && - "Computed iteration count is not loop invariant!"); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); - // Insert a new icmp_ne or icmp_eq instruction before the branch. - ICmpInst::Predicate Opcode; - if (L->contains(BI->getSuccessor(0))) - Opcode = ICmpInst::ICMP_NE; - else - Opcode = ICmpInst::ICMP_EQ; + // Insert new integer induction variable. + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); + NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), + PN->getIncomingBlock(IncomingEdge)); - DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" - << " LHS:" << *CmpIndVar << '\n' - << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"); + Value *NewAdd = + BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), + Incr->getName()+".int", Incr); + NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); + ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, + ConstantInt::get(Int32Ty, ExitValue), + Compare->getName()); - Value *OrigCond = BI->getCondition(); - // It's tempting to use replaceAllUsesWith here to fully replace the old - // comparison, but that's not immediately safe, since users of the old - // comparison may not be dominated by the new comparison. Instead, just - // update the branch to use the new comparison; in the common case this - // will make old comparison dead. - BI->setCondition(Cond); - DeadInsts.push_back(OrigCond); + // In the following deletions, PN may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PN; - ++NumLFTR; - Changed = true; - return Cond; + // Delete the old floating point exit comparison. The branch starts using the + // new comparison. + NewCompare->takeName(Compare); + Compare->replaceAllUsesWith(NewCompare); + RecursivelyDeleteTriviallyDeadInstructions(Compare); + + // Delete the old floating point increment. + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); + RecursivelyDeleteTriviallyDeadInstructions(Incr); + + // If the FP induction variable still has uses, this is because something else + // in the loop uses its value. In order to canonicalize the induction + // variable, we chose to eliminate the IV and rewrite it in terms of an + // int->fp cast. + // + // We give preference to sitofp over uitofp because it is faster on most + // platforms. + if (WeakPH) { + Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", + PN->getParent()->getFirstNonPHI()); + PN->replaceAllUsesWith(Conv); + RecursivelyDeleteTriviallyDeadInstructions(PN); + } + + // Add a new IVUsers entry for the newly-created integer PHI. + if (IU) + IU->AddUsersIfInteresting(NewPHI); } +void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { + // First step. Check to see if there are any floating-point recurrences. + // If there are, change them into integer recurrences, permitting analysis by + // the SCEV routines. + // + BasicBlock *Header = L->getHeader(); + + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) + HandleFloatingPointIV(L, PN); + + // If the loop previously had floating-point IV, ScalarEvolution + // may not have been able to compute a trip count. Now that we've done some + // re-writing, the trip count may be computable. + if (Changed) + SE->forgetLoop(L); +} + +//===----------------------------------------------------------------------===// +// RewriteLoopExitValues - Optimize IV users outside the loop. +// As a side effect, reduces the amount of IV processing within the loop. +//===----------------------------------------------------------------------===// + /// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and @@ -460,29 +580,168 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { Rewriter.clearInsertPoint(); } -void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { - // First step. Check to see if there are any floating-point recurrences. - // If there are, change them into integer recurrences, permitting analysis by - // the SCEV routines. +//===----------------------------------------------------------------------===// +// Rewrite IV users based on a canonical IV. +// To be replaced by -disable-iv-rewrite. +//===----------------------------------------------------------------------===// + +/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this +/// loop. IVUsers is treated as a worklist. Each successive simplification may +/// push more users which may themselves be candidates for simplification. +/// +/// This is the old approach to IV simplification to be replaced by +/// SimplifyIVUsersNoRewrite. +/// +void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + EliminateIVComparison(ICmp, IVOperand); + continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + EliminateIVRemainder(Rem, IVOperand, IsSigned); + continue; + } + } + } +} + +// FIXME: It is an extremely bad idea to indvar substitute anything more +// complex than affine induction variables. Doing so will put expensive +// polynomial evaluations inside of the loop, and the str reduction pass +// currently can only reduce affine polynomials. For now just disable +// indvar subst on anything more complex than an affine addrec, unless +// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { + // Loop-invariant values are safe. + if (SE->isLoopInvariant(S, L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L, SE)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L, SE); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L, SE) && + isSafe(UD->getRHS(), L, SE); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + +void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. // - BasicBlock *Header = L->getHeader(); + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); + Instruction *User = UI->getUser(); - SmallVector<WeakVH, 8> PHIs; - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) - PHIs.push_back(PN); + // Compute the final addrec to expand into code. + const SCEV *AR = IU->getReplacementExpr(*UI); - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) - HandleFloatingPointIV(L, PN); + // Evaluate the expression out of the loop, if possible. + if (!L->contains(UI->getUser())) { + const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); + if (SE->isLoopInvariant(ExitVal, L)) + AR = ExitVal; + } - // If the loop previously had floating-point IV, ScalarEvolution - // may not have been able to compute a trip count. Now that we've done some - // re-writing, the trip count may be computable. - if (Changed) - SE->forgetLoop(L); + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!isSafe(AR, L, SE)) + continue; + + // Determine the insertion point for this user. By default, insert + // immediately before the user. The SCEVExpander class will automatically + // hoist loop invariants out of the loop. For PHI nodes, there may be + // multiple uses, so compute the nearest common dominator for the + // incoming blocks. + Instruction *InsertPt = User; + if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingValue(i) == Op) { + if (InsertPt == User) + InsertPt = PHI->getIncomingBlock(i)->getTerminator(); + else + InsertPt = + DT->findNearestCommonDominator(InsertPt->getParent(), + PHI->getIncomingBlock(i)) + ->getTerminator(); + } + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } + // Inform ScalarEvolution that this value is changing. The change doesn't + // affect its value, but it does potentially affect which use lists the + // value will be on after the replacement, which affects ScalarEvolution's + // ability to walk use lists and drop dangling pointers when a value is + // deleted. + SE->forgetValue(User); + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) + NewValI->setDebugLoc(User->getDebugLoc()); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } } +//===----------------------------------------------------------------------===// +// IV Widening - Extend the width of an IV to cover its widest uses. +//===----------------------------------------------------------------------===// + namespace { // Collect information about induction variables that are used by sign/zero // extend operations. This information is recorded by CollectExtend and @@ -493,33 +752,30 @@ namespace { WideIVInfo() : WidestNativeType(0), IsSigned(false) {} }; - typedef std::map<PHINode *, WideIVInfo> WideIVMap; } /// CollectExtend - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. -static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned, - WideIVMap &IVMap, ScalarEvolution *SE, - const TargetData *TD) { +static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI, + ScalarEvolution *SE, const TargetData *TD) { const Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); if (TD && !TD->isLegalInteger(Width)) return; - WideIVInfo &IVInfo = IVMap[Phi]; - if (!IVInfo.WidestNativeType) { - IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); - IVInfo.IsSigned = IsSigned; + if (!WI.WidestNativeType) { + WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); + WI.IsSigned = IsSigned; return; } // We extend the IV to satisfy the sign of its first user, arbitrarily. - if (IVInfo.IsSigned != IsSigned) + if (WI.IsSigned != IsSigned) return; - if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType)) - IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); + if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) + WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); } namespace { @@ -529,43 +785,45 @@ namespace { /// inserting truncs whenever we stop propagating the type. /// class WidenIV { + // Parameters PHINode *OrigPhi; const Type *WideType; bool IsSigned; - IVUsers *IU; - LoopInfo *LI; - Loop *L; + // Context + LoopInfo *LI; + Loop *L; ScalarEvolution *SE; - DominatorTree *DT; - SmallVectorImpl<WeakVH> &DeadInsts; + DominatorTree *DT; + // Result PHINode *WidePhi; Instruction *WideInc; const SCEV *WideIncExpr; + SmallVectorImpl<WeakVH> &DeadInsts; - SmallPtrSet<Instruction*,16> Processed; + SmallPtrSet<Instruction*,16> Widened; + SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; public: - WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers, - LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, + WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo, + ScalarEvolution *SEv, DominatorTree *DTree, SmallVectorImpl<WeakVH> &DI) : OrigPhi(PN), - WideType(IVInfo.WidestNativeType), - IsSigned(IVInfo.IsSigned), - IU(IUsers), + WideType(WI.WidestNativeType), + IsSigned(WI.IsSigned), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - DeadInsts(DI), WidePhi(0), WideInc(0), - WideIncExpr(0) { + WideIncExpr(0), + DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } - bool CreateWideIV(SCEVExpander &Rewriter); + PHINode *CreateWideIV(SCEVExpander &Rewriter); protected: Instruction *CloneIVUser(Instruction *NarrowUse, @@ -574,58 +832,13 @@ protected: const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, + Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef); + + void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace -/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this -/// loop. IVUsers is treated as a worklist. Each successive simplification may -/// push more users which may themselves be candidates for simplification. -/// -void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { - WideIVMap IVMap; - - // Each round of simplification involves a round of eliminating operations - // followed by a round of widening IVs. A single IVUsers worklist is used - // across all rounds. The inner loop advances the user. If widening exposes - // more uses, then another pass through the outer loop is triggered. - for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) { - for(; I != E; ++I) { - Instruction *UseInst = I->getUser(); - Value *IVOperand = I->getOperandValToReplace(); - - if (DisableIVRewrite) { - if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { - bool IsSigned = Cast->getOpcode() == Instruction::SExt; - if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { - CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD); - continue; - } - } - } - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - EliminateIVComparison(ICmp, IVOperand); - continue; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi()); - continue; - } - } - } - for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end(); - I != E; ++I) { - WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts); - if (Widener.CreateWideIV(Rewriter)) - Changed = true; - } - } -} - static Value *getExtend( Value *NarrowOper, const Type *WideType, bool IsSigned, IRBuilder<> &Builder) { return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : @@ -671,34 +884,16 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, LHS, RHS, NarrowBO->getName()); Builder.Insert(WideBO); - if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); - if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); - + if (const OverflowingBinaryOperator *OBO = + dyn_cast<OverflowingBinaryOperator>(NarrowBO)) { + if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + } return WideBO; } llvm_unreachable(0); } -// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' -// perspective after widening it's type? In other words, can the extend be -// safely hoisted out of the loop with SCEV reducing the value to a recurrence -// on the same loop. If so, return the sign or zero extended -// recurrence. Otherwise return NULL. -const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { - if (!SE->isSCEVable(NarrowUse->getType())) - return 0; - - const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); - const SCEV *WideExpr = IsSigned ? - SE->getSignExtendExpr(NarrowExpr, WideType) : - SE->getZeroExtendExpr(NarrowExpr, WideType); - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); - if (!AddRec || AddRec->getLoop() != L) - return 0; - - return AddRec; -} - /// HoistStep - Attempt to hoist an IV increment above a potential use. /// /// To successfully hoist, two criteria must be met: @@ -733,18 +928,41 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos, return true; } +// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' +// perspective after widening it's type? In other words, can the extend be +// safely hoisted out of the loop with SCEV reducing the value to a recurrence +// on the same loop. If so, return the sign or zero extended +// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) + >= SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return 0; + } + + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + + return AddRec; +} + /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, - Instruction *NarrowDef, +Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef) { - // To be consistent with IVUsers, stop traversing the def-use chain at - // inner-loop phis or post-loop phis. - if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) - return 0; + Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser()); - // Handle data flow merges and bizarre phi cycles. - if (!Processed.insert(NarrowUse)) + // Stop traversing the def-use chain at inner-loop phis or post-loop phis. + if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) return 0; // Our raison d'etre! Eliminate sign and zero extension. @@ -755,7 +973,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, unsigned IVWidth = SE->getTypeSizeInBits(WideType); if (CastWidth < IVWidth) { // The cast isn't as wide as the IV, so insert a Trunc. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); } else { @@ -775,23 +993,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, NarrowUse->replaceAllUsesWith(NewDef); DeadInsts.push_back(NarrowUse); } - // Now that the extend is gone, expose it's uses to IVUsers for potential - // further simplification within SimplifyIVUsers. - IU->AddUsersIfInteresting(WideDef, WidePhi); + // Now that the extend is gone, we want to expose it's uses for potential + // further simplification. We don't need to directly inform SimplifyIVUsers + // of the new users, because their parent IV will be processed later as a + // new loop phi. If we preserved IVUsers analysis, we would also want to + // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. return 0; } + + // Does this user itself evaluate to a recurrence after widening? const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); if (!WideAddRec) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(NarrowUse); + IRBuilder<> Builder(NarrowDefUse); Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); return 0; } + // We assume that block terminators are not SCEVable. We wouldn't want to + // insert a Trunc after a terminator if there happens to be a critical edge. + assert(NarrowUse != NarrowUse->getParent()->getTerminator() && + "SCEV is not expected to evaluate a block terminator"); + // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. Instruction *WideUse = 0; @@ -803,11 +1030,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, if (!WideUse) return 0; } - // GetWideRecurrence ensured that the narrow expression could be extended - // outside the loop without overflow. This suggests that the wide use + // Evaluation of WideAddRec ensured that the narrow expression could be + // extended outside the loop without overflow. This suggests that the wide use // evaluates to the same expression as the extended narrow use, but doesn't // absolutely guarantee it. Hence the following failsafe check. In rare cases - // where it fails, we simple throw away the newly created wide use. + // where it fails, we simply throw away the newly created wide use. if (WideAddRec != SE->getSCEV(WideUse)) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); @@ -819,21 +1046,36 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, return WideUse; } +/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. +/// +void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + for (Value::use_iterator UI = NarrowDef->use_begin(), + UE = NarrowDef->use_end(); UI != UE; ++UI) { + Use &U = UI.getUse(); + + // Handle data flow merges and bizarre phi cycles. + if (!Widened.insert(cast<Instruction>(U.getUser()))) + continue; + + NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef)); + } +} + /// CreateWideIV - Process a single induction variable. First use the /// SCEVExpander to create a wide induction variable that evaluates to the same /// recurrence as the original narrow IV. Then use a worklist to forward -/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all +/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all /// interesting IV users, the narrow IV will be isolated for removal by /// DeleteDeadPHIs. /// /// It would be simpler to delete uses as they are processed, but we must avoid /// invalidating SCEV expressions. /// -bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { +PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); if (!AddRec) - return false; + return NULL; // Widen the induction variable expression. const SCEV *WideIVExpr = IsSigned ? @@ -846,9 +1088,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) - return false; + return NULL; - // An AddRec must have loop-invariant operands. Since this AddRec it + // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop // operands, so they must dominate the loop header. assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && @@ -876,39 +1118,37 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { ++NumWidened; // Traverse the def-use chain using a worklist starting at the original IV. - assert(Processed.empty() && "expect initial state" ); + assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); + + Widened.insert(OrigPhi); + pushNarrowIVUsers(OrigPhi, WidePhi); - // Each worklist entry has a Narrow def-use link and Wide def. - SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; - for (Value::use_iterator UI = OrigPhi->use_begin(), - UE = OrigPhi->use_end(); UI != UE; ++UI) { - NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); - } while (!NarrowIVUsers.empty()) { - Use *NarrowDefUse; + Use *UsePtr; Instruction *WideDef; - tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); + tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val(); + Use &NarrowDefUse = *UsePtr; // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); - Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); - Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); + Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get()); + Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef); // Follow all def-use edges from the previous narrow use. - if (WideUse) { - for (Value::use_iterator UI = NarrowUse->use_begin(), - UE = NarrowUse->use_end(); UI != UE; ++UI) { - NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); - } - } + if (WideUse) + pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse); + // WidenIVUse may have removed the def-use edge. if (NarrowDef->use_empty()) DeadInsts.push_back(NarrowDef); } - return true; + return WidePhi; } +//===----------------------------------------------------------------------===// +// Simplification of IV users based on SCEV evaluation. +//===----------------------------------------------------------------------===// + void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); @@ -945,8 +1185,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, - bool IsSigned, - PHINode *IVPhi) { + bool IsSigned) { // We're only interested in the case where we know something about // the numerator. if (IVOperand != Rem->getOperand(0)) @@ -989,15 +1228,465 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, } // Inform IVUsers about the new users. - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I, IVPhi); - + if (IU) { + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + } DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; DeadInsts.push_back(Rem); } +/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has +/// no observable side-effect given the range of IV values. +bool IndVarSimplify::EliminateIVUser(Instruction *UseInst, + Instruction *IVOperand) { + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + EliminateIVComparison(ICmp, IVOperand); + return true; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + EliminateIVRemainder(Rem, IVOperand, IsSigned); + return true; + } + } + + // Eliminate any operation that SCEV can prove is an identity function. + if (!SE->isSCEVable(UseInst->getType()) || + (UseInst->getType() != IVOperand->getType()) || + (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) + return false; + + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); + + UseInst->replaceAllUsesWith(IVOperand); + ++NumElimIdentity; + Changed = true; + DeadInsts.push_back(UseInst); + return true; +} + +/// pushIVUsers - Add all uses of Def to the current IV's worklist. +/// +static void pushIVUsers( + Instruction *Def, + SmallPtrSet<Instruction*,16> &Simplified, + SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) { + + for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // Avoid infinite or exponential worklist processing. + // Also ensure unique worklist users. + // If Def is a LoopPhi, it may not be in the Simplified set, so check for + // self edges first. + if (User != Def && Simplified.insert(User)) + SimpleIVUsers.push_back(std::make_pair(User, Def)); + } +} + +/// isSimpleIVUser - Return true if this instruction generates a simple SCEV +/// expression in terms of that IV. +/// +/// This is similar to IVUsers' isInsteresting() but processes each instruction +/// non-recursively when the operand is already known to be a simpleIVUser. +/// +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Get the symbolic expression for this instruction. + const SCEV *S = SE->getSCEV(I); + + // We assume that terminators are not SCEVable. + assert((!S || I != I->getParent()->getTerminator()) && + "can't fold terminators"); + + // Only consider affine recurrences. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); + if (AR && AR->getLoop() == L) + return true; + + return false; +} + +/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist +/// of IV users. Each successive simplification may push more users which may +/// themselves be candidates for simplification. +/// +/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it +/// simplifies instructions in-place during analysis. Rather than rewriting +/// induction variables bottom-up from their users, it transforms a chain of +/// IVUsers top-down, updating the IR only when it encouters a clear +/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still +/// needed, but only used to generate a new IV (phi) of wider type for sign/zero +/// extend elimination. +/// +/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. +/// +void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { + std::map<PHINode *, WideIVInfo> WideIVMap; + + SmallVector<PHINode*, 8> LoopPhis; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + LoopPhis.push_back(cast<PHINode>(I)); + } + // Each round of simplification iterates through the SimplifyIVUsers worklist + // for all current phis, then determines whether any IVs can be + // widened. Widening adds new phis to LoopPhis, inducing another round of + // simplification on the wide IVs. + while (!LoopPhis.empty()) { + // Evaluate as many IV expressions as possible before widening any IVs. This + // forces SCEV to set no-wrap flags before evaluating sign/zero + // extension. The first time SCEV attempts to normalize sign/zero extension, + // the result becomes final. So for the most predictable results, we delay + // evaluation of sign/zero extend evaluation until needed, and avoid running + // other SCEV based analysis prior to SimplifyIVUsersNoRewrite. + do { + PHINode *CurrIV = LoopPhis.pop_back_val(); + + // Information about sign/zero extensions of CurrIV. + WideIVInfo WI; + + // Instructions processed by SimplifyIVUsers for CurrIV. + SmallPtrSet<Instruction*,16> Simplified; + + // Use-def pairs if IV users waiting to be processed for CurrIV. + SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; + + // Push users of the current LoopPhi. In rare cases, pushIVUsers may be + // called multiple times for the same LoopPhi. This is the proper thing to + // do for loop header phis that use each other. + pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + + while (!SimpleIVUsers.empty()) { + Instruction *UseInst, *Operand; + tie(UseInst, Operand) = SimpleIVUsers.pop_back_val(); + // Bypass back edges to avoid extra work. + if (UseInst == CurrIV) continue; + + if (EliminateIVUser(UseInst, Operand)) { + pushIVUsers(Operand, Simplified, SimpleIVUsers); + continue; + } + if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { + CollectExtend(Cast, IsSigned, WI, SE, TD); + } + continue; + } + if (isSimpleIVUser(UseInst, L, SE)) { + pushIVUsers(UseInst, Simplified, SimpleIVUsers); + } + } + if (WI.WidestNativeType) { + WideIVMap[CurrIV] = WI; + } + } while(!LoopPhis.empty()); + + for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(), + E = WideIVMap.end(); I != E; ++I) { + WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts); + if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { + Changed = true; + LoopPhis.push_back(WidePhi); + } + } + WideIVMap.clear(); + } +} + +/// SimplifyCongruentIVs - Check for congruent phis in this loop header and +/// populate ExprToIVMap for use later. +/// +void IndVarSimplify::SimplifyCongruentIVs(Loop *L) { + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEV *S = SE->getSCEV(Phi); + DenseMap<const SCEV *, PHINode *>::const_iterator Pos; + bool Inserted; + tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi)); + if (Inserted) + continue; + PHINode *OrigPhi = Pos->second; + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's almost certain to be the head of an IV + // user cycle that is isomorphic with the original phi. So it's worth + // eagerly cleaning up the common case of a single IV increment. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + if (OrigInc != IsomorphicInc && + SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) && + HoistStep(OrigInc, IsomorphicInc, DT)) { + DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + IsomorphicInc->replaceAllUsesWith(OrigInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElimIV; + Phi->replaceAllUsesWith(OrigPhi); + DeadInsts.push_back(Phi); + } +} + +//===----------------------------------------------------------------------===// +// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +//===----------------------------------------------------------------------===// + +/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken +/// count expression can be safely and cheaply expanded into an instruction +/// sequence that can be used by LinearFunctionTestReplace. +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + BackedgeTakenCount->isZero()) + return false; + + if (!L->getExitingBlock()) + return false; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return false; + + // Special case: If the backedge-taken count is a UDiv, it's very likely a + // UDiv that ScalarEvolution produced in order to compute a precise + // expression, rather than a UDiv from the user's code. If we can't find a + // UDiv in the code with some simple searching, assume the former and forego + // rewriting the loop. + if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return false; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != BackedgeTakenCount) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != BackedgeTakenCount) + return false; + } + } + return true; +} + +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary if LFTR does not force a canonical IV. +static const Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + const Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +ICmpInst *IndVarSimplify:: +LinearFunctionTestReplace(Loop *L, + const SCEV *BackedgeTakenCount, + PHINode *IndVar, + SCEVExpander &Rewriter) { + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + + // If the exiting block is not the same as the backedge block, we must compare + // against the preincremented value, otherwise we prefer to compare against + // the post-incremented value. + Value *CmpIndVar; + const SCEV *RHS = BackedgeTakenCount; + if (L->getExitingBlock() == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // If this addition may overflow, we have to be more pessimistic and + // cast the induction variable before doing the add. + const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); + const SCEV *N = + SE->getAddExpr(BackedgeTakenCount, + SE->getConstant(BackedgeTakenCount->getType(), 1)); + if ((isa<SCEVConstant>(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); + } else { + // Potential overflow. Cast before doing the add. + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + RHS = SE->getAddExpr(RHS, + SE->getConstant(IndVar->getType(), 1)); + } + + // The BackedgeTaken expression contains the number of times that the + // backedge branches to the loop header. This is one less than the + // number of times the loop executes, so use the incremented indvar. + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + } else { + // We have to use the preincremented value... + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + CmpIndVar = IndVar; + } + + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(RHS, L) && + "Computed iteration count is not loop invariant!"); + Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + + // Insert a new icmp_ne or icmp_eq instruction before the branch. + ICmpInst::Predicate Opcode; + if (L->contains(BI->getSuccessor(0))) + Opcode = ICmpInst::ICMP_NE; + else + Opcode = ICmpInst::ICMP_EQ; + + DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar << '\n' + << " op:\t" + << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *RHS << "\n"); + + ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); + Cond->setDebugLoc(BI->getDebugLoc()); + Value *OrigCond = BI->getCondition(); + // It's tempting to use replaceAllUsesWith here to fully replace the old + // comparison, but that's not immediately safe, since users of the old + // comparison may not be dominated by the new comparison. Instead, just + // update the branch to use the new comparison; in the common case this + // will make old comparison dead. + BI->setCondition(Cond); + DeadInsts.push_back(OrigCond); + + ++NumLFTR; + Changed = true; + return Cond; +} + +//===----------------------------------------------------------------------===// +// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. +//===----------------------------------------------------------------------===// + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return; + + Instruction *InsertPt = ExitBlock->getFirstNonPHI(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. + if (isa<PHINode>(I)) + break; + + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Also don't + // move instructions which might read memory, since the loop may modify + // memory. Note that it's okay if the instruction might have undefined + // behavior: LoopSimplify guarantees that the preheader dominates the exit + // block. + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) + continue; + + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Don't sink static AllocaInsts out of the entry block, which would + // turn them into dynamic allocas! + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (AI->isStaticAlloca()) + continue; + + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + + if (I != Preheader->begin()) { + // Skip debug info intrinsics. + do { + --I; + } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); + + if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) + Done = true; + } else { + Done = true; + } + + ToMove->moveBefore(InsertPt); + if (Done) break; + InsertPt = ToMove; + } +} + +//===----------------------------------------------------------------------===// +// IndVarSimplify driver. Manage several subpasses of IV simplification. +//===----------------------------------------------------------------------===// + bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If LoopSimplify form is not available, stay out of trouble. Some notes: // - LSR currently only supports LoopSimplify-form loops. Indvars' @@ -1010,7 +1699,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - IU = &getAnalysis<IVUsers>(); + if (!DisableIVRewrite) + IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); @@ -1026,9 +1716,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE); - if (DisableIVRewrite) + SCEVExpander Rewriter(*SE, "indvars"); + + // Eliminate redundant IV users. + // + // Simplification works best when run before other consumers of SCEV. We + // attempt to avoid evaluating SCEVs for sign/zero extend operations until + // other expressions involving loop IVs have been evaluated. This helps SCEV + // set no-wrap flags before normalizing sign/zero extension. + if (DisableIVRewrite) { Rewriter.disableCanonicalMode(); + SimplifyIVUsersNoRewrite(L, Rewriter); + } // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -1040,7 +1739,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV users. - SimplifyIVUsers(Rewriter); + if (!DisableIVRewrite) + SimplifyIVUsers(Rewriter); + + // Eliminate redundant IV cycles. + if (DisableIVRewrite) + SimplifyCongruentIVs(L); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. @@ -1119,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { "canonical IV disrupted BackedgeTaken expansion"); assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); - NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - Rewriter); + // Check preconditions for proper SCEVExpander operation. SCEV does not + // express SCEVExpander's dependencies, such as LoopSimplify. Instead any + // pass that uses the SCEVExpander must do it. This does not work well for + // loop passes because SCEVExpander makes assumptions about all loops, while + // LoopPassManager only forces the current loop to be simplified. + // + // FIXME: SCEV expansion has no way to bail out, so the caller must + // explicitly check any assumptions made by SCEV. Brittle. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); + if (!AR || AR->getLoop()->getLoopPreheader()) + NewICmp = + LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); } // Rewrite IV-derived expressions. if (!DisableIVRewrite) @@ -1146,9 +1860,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. - if (NewICmp) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)), - IndVar); + if (NewICmp && IU) + IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -1156,428 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); return Changed; } - -// FIXME: It is an extremely bad idea to indvar substitute anything more -// complex than affine induction variables. Doing so will put expensive -// polynomial evaluations inside of the loop, and the str reduction pass -// currently can only reduce affine polynomials. For now just disable -// indvar subst on anything more complex than an affine addrec, unless -// it can be expanded to a trivial value. -static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { - // Loop-invariant values are safe. - if (SE->isLoopInvariant(S, L)) return true; - - // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how - // to transform them into efficient code. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - return AR->isAffine(); - - // An add is safe it all its operands are safe. - if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { - for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), - E = Commutative->op_end(); I != E; ++I) - if (!isSafe(*I, L, SE)) return false; - return true; - } - - // A cast is safe if its operand is. - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) - return isSafe(C->getOperand(), L, SE); - - // A udiv is safe if its operands are. - if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) - return isSafe(UD->getLHS(), L, SE) && - isSafe(UD->getRHS(), L, SE); - - // SCEVUnknown is always safe. - if (isa<SCEVUnknown>(S)) - return true; - - // Nothing else is safe. - return false; -} - -void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - // Rewrite all induction variable expressions in terms of the canonical - // induction variable. - // - // If there were induction variables of other sizes or offsets, manually - // add the offsets to the primary induction variable and cast, avoiding - // the need for the code evaluation methods to insert induction variables - // of different sizes. - for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); - Instruction *User = UI->getUser(); - - // Compute the final addrec to expand into code. - const SCEV *AR = IU->getReplacementExpr(*UI); - - // Evaluate the expression out of the loop, if possible. - if (!L->contains(UI->getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (SE->isLoopInvariant(ExitVal, L)) - AR = ExitVal; - } - - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!isSafe(AR, L, SE)) - continue; - - // Determine the insertion point for this user. By default, insert - // immediately before the user. The SCEVExpander class will automatically - // hoist loop invariants out of the loop. For PHI nodes, there may be - // multiple uses, so compute the nearest common dominator for the - // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } - - // Now expand it into actual Instructions and patch it into place. - Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); - - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); - - if (!isValidRewrite(Op, NewVal)) { - DeadInsts.push_back(NewVal); - continue; - } - // Inform ScalarEvolution that this value is changing. The change doesn't - // affect its value, but it does potentially affect which use lists the - // value will be on after the replacement, which affects ScalarEvolution's - // ability to walk use lists and drop dangling pointers when a value is - // deleted. - SE->forgetValue(User); - - // Patch the new value into place. - if (Op->hasName()) - NewVal->takeName(Op); - User->replaceUsesOfWith(Op, NewVal); - UI->setOperandValToReplace(NewVal); - - ++NumRemoved; - Changed = true; - - // The old value may be dead now. - DeadInsts.push_back(Op); - } -} - -/// If there's a single exit block, sink any loop-invariant values that -/// were defined in the preheader but not used inside the loop into the -/// exit block to reduce register pressure in the loop. -void IndVarSimplify::SinkUnusedInvariants(Loop *L) { - BasicBlock *ExitBlock = L->getExitBlock(); - if (!ExitBlock) return; - - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) return; - - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); - BasicBlock::iterator I = Preheader->getTerminator(); - while (I != Preheader->begin()) { - --I; - // New instructions were inserted at the end of the preheader. - if (isa<PHINode>(I)) - break; - - // Don't move instructions which might have side effects, since the side - // effects need to complete before instructions inside the loop. Also don't - // move instructions which might read memory, since the loop may modify - // memory. Note that it's okay if the instruction might have undefined - // behavior: LoopSimplify guarantees that the preheader dominates the exit - // block. - if (I->mayHaveSideEffects() || I->mayReadFromMemory()) - continue; - - // Skip debug info intrinsics. - if (isa<DbgInfoIntrinsic>(I)) - continue; - - // Don't sink static AllocaInsts out of the entry block, which would - // turn them into dynamic allocas! - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - if (AI->isStaticAlloca()) - continue; - - // Determine if there is a use in or before the loop (direct or - // otherwise). - bool UsedInLoop = false; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - User *U = *UI; - BasicBlock *UseBB = cast<Instruction>(U)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(U)) { - unsigned i = - PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); - UseBB = P->getIncomingBlock(i); - } - if (UseBB == Preheader || L->contains(UseBB)) { - UsedInLoop = true; - break; - } - } - - // If there is, the def must remain in the preheader. - if (UsedInLoop) - continue; - - // Otherwise, sink it to the exit block. - Instruction *ToMove = I; - bool Done = false; - - if (I != Preheader->begin()) { - // Skip debug info intrinsics. - do { - --I; - } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); - - if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) - Done = true; - } else { - Done = true; - } - - ToMove->moveBefore(InsertPt); - if (Done) break; - InsertPt = ToMove; - } -} - -/// ConvertToSInt - Convert APF to an integer, if possible. -static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { - bool isExact = false; - if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // See if we can convert this to an int64_t - uint64_t UIntVal; - if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, - &isExact) != APFloat::opOK || !isExact) - return false; - IntVal = UIntVal; - return true; -} - -/// HandleFloatingPointIV - If the loop has floating induction variable -/// then insert corresponding integer induction variable if possible. -/// For example, -/// for(double i = 0; i < 10000; ++i) -/// bar(i) -/// is converted into -/// for(int i = 0; i < 10000; ++i) -/// bar((double)i); -/// -void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { - unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); - unsigned BackEdge = IncomingEdge^1; - - // Check incoming value. - ConstantFP *InitValueVal = - dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); - - int64_t InitValue; - if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) - return; - - // Check IV increment. Reject this PN if increment operation is not - // an add or increment value can not be represented by an integer. - BinaryOperator *Incr = - dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - - // If this is not an add of the PHI with a constantfp, or if the constant fp - // is not an integer, bail out. - ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); - int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || - !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) - return; - - // Check Incr uses. One user is PN and the other user is an exit condition - // used by the conditional terminator. - Value::use_iterator IncrUse = Incr->use_begin(); - Instruction *U1 = cast<Instruction>(*IncrUse++); - if (IncrUse == Incr->use_end()) return; - Instruction *U2 = cast<Instruction>(*IncrUse++); - if (IncrUse != Incr->use_end()) return; - - // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't - // only used by a branch, we can't transform it. - FCmpInst *Compare = dyn_cast<FCmpInst>(U1); - if (!Compare) - Compare = dyn_cast<FCmpInst>(U2); - if (Compare == 0 || !Compare->hasOneUse() || - !isa<BranchInst>(Compare->use_back())) - return; - - BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); - - // We need to verify that the branch actually controls the iteration count - // of the loop. If not, the new IV can overflow and no one will notice. - // The branch block must be in the loop and one of the successors must be out - // of the loop. - assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); - if (!L->contains(TheBr->getParent()) || - (L->contains(TheBr->getSuccessor(0)) && - L->contains(TheBr->getSuccessor(1)))) - return; - - - // If it isn't a comparison with an integer-as-fp (the exit value), we can't - // transform it. - ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); - int64_t ExitValue; - if (ExitValueVal == 0 || - !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) - return; - - // Find new predicate for integer comparison. - CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; - switch (Compare->getPredicate()) { - default: return; // Unknown comparison. - case CmpInst::FCMP_OEQ: - case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; - case CmpInst::FCMP_ONE: - case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; - case CmpInst::FCMP_OGT: - case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; - case CmpInst::FCMP_OGE: - case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; - case CmpInst::FCMP_OLT: - case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; - case CmpInst::FCMP_OLE: - case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; - } - - // We convert the floating point induction variable to a signed i32 value if - // we can. This is only safe if the comparison will not overflow in a way - // that won't be trapped by the integer equivalent operations. Check for this - // now. - // TODO: We could use i64 if it is native and the range requires it. - - // The start/stride/exit values must all fit in signed i32. - if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) - return; - - // If not actually striding (add x, 0.0), avoid touching the code. - if (IncValue == 0) - return; - - // Positive and negative strides have different safety conditions. - if (IncValue > 0) { - // If we have a positive stride, we require the init to be less than the - // exit value and an equality or less than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) - return; - - uint32_t Range = uint32_t(ExitValue-InitValue); - if (NewPred == CmpInst::ICMP_SLE) { - // Normalize SLE -> SLT, check for infinite loop. - if (++Range == 0) return; // Range overflows. - } - - unsigned Leftover = Range % uint32_t(IncValue); - - // If this is an equality comparison, we require that the strided value - // exactly land on the exit value, otherwise the IV condition will wrap - // around and do things the fp IV wouldn't. - if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && - Leftover != 0) - return; - - // If the stride would wrap around the i32 before exiting, we can't - // transform the IV. - if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) - return; - - } else { - // If we have a negative stride, we require the init to be greater than the - // exit value and an equality or greater than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) - return; - - uint32_t Range = uint32_t(InitValue-ExitValue); - if (NewPred == CmpInst::ICMP_SGE) { - // Normalize SGE -> SGT, check for infinite loop. - if (++Range == 0) return; // Range overflows. - } - - unsigned Leftover = Range % uint32_t(-IncValue); - - // If this is an equality comparison, we require that the strided value - // exactly land on the exit value, otherwise the IV condition will wrap - // around and do things the fp IV wouldn't. - if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && - Leftover != 0) - return; - - // If the stride would wrap around the i32 before exiting, we can't - // transform the IV. - if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) - return; - } - - const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); - - // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); - NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), - PN->getIncomingBlock(IncomingEdge)); - - Value *NewAdd = - BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), - Incr->getName()+".int", Incr); - NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - - ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, - ConstantInt::get(Int32Ty, ExitValue), - Compare->getName()); - - // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; - - // Delete the old floating point exit comparison. The branch starts using the - // new comparison. - NewCompare->takeName(Compare); - Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); - - // Delete the old floating point increment. - Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); - - // If the FP induction variable still has uses, this is because something else - // in the loop uses its value. In order to canonicalize the induction - // variable, we chose to eliminate the IV and rewrite it in terms of an - // int->fp cast. - // - // We give preference to sitofp over uitofp because it is faster on most - // platforms. - if (WeakPH) { - Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", - PN->getParent()->getFirstNonPHI()); - PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); - } - - // Add a new IVUsers entry for the newly-created integer PHI. - IU->AddUsersIfInteresting(NewPHI, NewPHI); -} diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index cf18ff040bda..b500d5b4fdff 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { TestBB = BBTerm->getSuccessor(i); unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) + if (NumPreds < MinNumPreds) { MinSucc = i; + MinNumPreds = NumPreds; + } } return MinSucc; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 13bd02215be5..66add6ca01ee 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } /// Hoist expressions out of the specified loop. Note, alias info for inner -/// loop is not preserved so it is not a good idea to run LICM multiple +/// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. /// bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); - + // Once we've incorporated the inner loop's AST into ours, we don't need the // subloop's anymore. delete InnerAST; LoopToAliasSetMap.erase(InnerL); } - + CurLoop = L; // Get the preheader block to move instructions into... @@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { I != E; ++I) PromoteAliasSet(*I); } - + // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; @@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; - + // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. if (isInstructionTriviallyDead(&I)) { @@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) { I.eraseFromParent(); continue; } - + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. @@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - + // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; if (LI->getType()->isSized()) @@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) { } return; } - + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); @@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) { I.eraseFromParent(); return; } - + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the // hard work of inserting PHI nodes as necessary. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSA(&NewPHIs); - + if (!I.use_empty()) SSA.Initialize(I.getType(), I.getName()); - + // Insert a copy of the instruction in each exit block of the loop that is // dominated by the instruction. Each exit block is known to only be in the // ExitBlocks list once. BasicBlock *InstOrigBB = I.getParent(); unsigned NumInserted = 0; - + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; - + if (!DT->dominates(InstOrigBB, ExitBlock)) continue; - + // Insert the code after the last PHI node. BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - + // If this is the first exit block processed, just move the original // instruction, otherwise clone the original instruction and insert // the copy. @@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) { New->setName(I.getName()+".le"); ExitBlock->getInstList().insert(InsertPt, New); } - + // Now that we have inserted the instruction, inform SSAUpdater. if (!I.use_empty()) SSA.AddAvailableValue(ExitBlock, New); } - + // If the instruction doesn't dominate any exit blocks, it must be dead. if (NumInserted == 0) { CurAST->deleteValue(&I); @@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) { I.eraseFromParent(); return; } - + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { // Grab the use before incrementing the iterator. @@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) { ++UI; SSA.RewriteUseAfterInsertions(U); } - + // Update CurAST for NewPHIs if I had pointer type. if (I.getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(&I, NewPHIs[i]); - + // Finally, remove the instruction from CurAST. It is no longer in the loop. CurAST->deleteValue(&I); } @@ -606,15 +606,17 @@ namespace { SmallVectorImpl<BasicBlock*> &LoopExitBlocks; AliasSetTracker &AST; DebugLoc DL; + int Alignment; public: LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl) - : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {} - + DebugLoc dl, int alignment) + : LoadAndStorePromoter(Insts, S), SomePtr(SP), + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), + Alignment(alignment) {} + virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { Value *Ptr; @@ -624,7 +626,7 @@ namespace { Ptr = cast<StoreInst>(I)->getPointerOperand(); return PointerMustAliases.count(Ptr); } - + virtual void doExtraRewritesBeforeFinalDeletion() const { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told @@ -635,6 +637,7 @@ namespace { Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); } } @@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) { if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; - + assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); @@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. - // + // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; - + SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; + // We start with an alignment of one and try to find instructions that allow + // us to prove better alignment. + unsigned Alignment = 1; + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); - + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; - + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - + // If there is an non-load/store instruction in the loop, we can't promote // it. - if (isa<LoadInst>(Use)) + unsigned InstAlignment; + if (LoadInst *load = dyn_cast<LoadInst>(Use)) { assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); - else if (isa<StoreInst>(Use)) { + InstAlignment = load->getAlignment(); + } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; + InstAlignment = store->getAlignment(); assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); } else return; // Not a load or store. - + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isSafeToExecuteUnconditionally(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); - + LoopUses.push_back(Use); } } - + // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; - + // Otherwise, this is safe to promote, lets do it! - DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; @@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) { SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); - + // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL); - + *CurAST, DL, Alignment); + // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 753a558cfe83..f7f32981baa7 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -190,7 +190,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast<PHINode>(BI)) { - P->replaceUsesOfWith(exitingBlock, preheader); + int j = P->getBasicBlockIndex(exitingBlock); + assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + P->setIncomingBlock(j, preheader); for (unsigned i = 1; i < exitingBlocks.size(); ++i) P->removeIncomingValue(exitingBlocks[i]); ++BI; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index dbf6eec331da..a0e41d9a9772 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -167,12 +167,17 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { if (Instruction *I = dyn_cast<Instruction>(V)) if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + deleteDeadInstruction(I, SE); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; + // Disable loop idiom recognition if the function's name is a common idiom. + StringRef Name = L->getHeader()->getParent()->getName(); + if (Name == "memset" || Name == "memcpy") + return false; + // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) @@ -467,8 +472,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -488,7 +493,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, deleteIfDeadInstruction(BasePtr, *SE); return false; } - + // Okay, everything looks good, insert the memset. // The # stored bytes is (BECount+1)*Size. Expand the trip count out to @@ -556,8 +561,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE); - + SCEVExpander Expander(*SE, "loop-idiom"); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -568,7 +573,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { @@ -593,9 +598,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, deleteIfDeadInstruction(StoreBasePtr, *SE); return false; } - + // Okay, everything is safe, we can transform this! - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -619,7 +624,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 47dced37c3a4..9fd0958fd4c3 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -220,7 +220,7 @@ bool LoopRotate::rotateLoop(Loop *L) { // For PHI nodes, the value available in OldPreHeader is just the // incoming value from OldPreHeader. for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) - ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 73ebd618a0cb..509d0264f10b 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1804,8 +1804,7 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(), - CondUse->getPhi()); + CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); TermBr->replaceUsesOfWith(OldCond, Cond); } } @@ -2768,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // value to the immediate would produce a value closer to zero than the // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) - if (C->getValue()->getValue().isNegative() != + if (C->getValue()->isNegative() != (NewF.AM.BaseOffs < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) .ule(abs64(NewF.AM.BaseOffs))) @@ -3699,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE); + SCEVExpander Rewriter(SE, "lsr"); Rewriter.disableCanonicalMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index e05f29c3e13f..840c4b69cf06 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1021,6 +1021,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); + // If Succ has any successors with PHI nodes, update them to have + // entries coming from Pred instead of Succ. + Succ->replaceAllUsesWith(Pred); + // Move all of the successor contents from Succ to Pred. Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), Succ->end()); @@ -1028,10 +1032,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); - // If Succ has any successors with PHI nodes, update them to have - // entries coming from Pred instead of Succ. - Succ->replaceAllUsesWith(Pred); - // Remove Succ from the loop tree. LI->removeBlock(Succ); LPM->deleteSimpleAnalysisValue(Succ, L); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index be5aa2ea5832..7ed3db6cc1db 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -487,7 +487,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { - if (!LI->isVolatile() && LI->hasOneUse()) { + if (!LI->isVolatile() && LI->hasOneUse() && + LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) @@ -496,17 +497,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. - MemDepResult sdep = MD->getDependency(SI); - if (!sdep.isNonLocal()) { - bool FoundCall = false; - for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) { - if (&*I == C) { - FoundCall = true; - break; - } - } - if (!FoundCall) + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + AliasAnalysis::Location StoreLoc = AA.getLocation(SI); + for (BasicBlock::iterator I = --BasicBlock::iterator(SI), + E = C; I != E; --I) { + if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { C = 0; + break; + } } } @@ -842,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); - const Type *ArgTys[3] = { M->getRawDest()->getType(), - M->getRawSource()->getType(), - M->getLength()->getType() }; + Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, - ArgTys, 3)); + ArgTys)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp new file mode 100644 index 000000000000..ee132d3be4f5 --- /dev/null +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -0,0 +1,3595 @@ +//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines ObjC ARC optimizations. ARC stands for +// Automatic Reference Counting and is a system for managing reference counts +// for objects in Objective C. +// +// The optimizations performed include elimination of redundant, partially +// redundant, and inconsequential reference count operations, elimination of +// redundant weak pointer operations, pattern-matching and replacement of +// low-level operations into higher-level operations, and numerous minor +// simplifications. +// +// This file also defines a simple ARC-aware AliasAnalysis. +// +// WARNING: This file knows about certain library functions. It recognizes them +// by name, and hardwires knowedge of their semantics. +// +// WARNING: This file knows about how certain Objective-C library functions are +// used. Naive LLVM IR transformations which would otherwise be +// behavior-preserving may break these assumptions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/GlobalVariable.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +// A handy option to enable/disable all optimizations in this file. +static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true)); + +//===----------------------------------------------------------------------===// +// Misc. Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// MapVector - An associative container with fast insertion-order + /// (deterministic) iteration over its elements. Plus the special + /// blot operation. + template<class KeyT, class ValueT> + class MapVector { + /// Map - Map keys to indices in Vector. + typedef DenseMap<KeyT, size_t> MapTy; + MapTy Map; + + /// Vector - Keys and values. + typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; + VectorTy Vector; + + public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~MapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), + E = Vector.end(); I != E; ++I) + assert(!I->first || + (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](KeyT Arg) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + Pair.first->second = Vector.size(); + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector.back().second; + } + return Vector[Pair.first->second].second; + } + + std::pair<iterator, bool> + insert(const std::pair<KeyT, ValueT> &InsertPair) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + Pair.first->second = Vector.size(); + Vector.push_back(InsertPair); + return std::make_pair(llvm::prior(Vector.end()), true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + const_iterator find(KeyT Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + + /// blot - This is similar to erase, but instead of removing the element + /// from the vector, it just zeros out the key in the vector. This leaves + /// iterators intact, but clients must be prepared for zeroed-out keys when + /// iterating. + void blot(KeyT Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + }; +} + +//===----------------------------------------------------------------------===// +// ARC Utilities. +//===----------------------------------------------------------------------===// + +namespace { + /// InstructionClass - A simple classification for instructions. + enum InstructionClass { + IC_Retain, ///< objc_retain + IC_RetainRV, ///< objc_retainAutoreleasedReturnValue + IC_RetainBlock, ///< objc_retainBlock + IC_Release, ///< objc_release + IC_Autorelease, ///< objc_autorelease + IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue + IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush + IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop + IC_NoopCast, ///< objc_retainedObject, etc. + IC_FusedRetainAutorelease, ///< objc_retainAutorelease + IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + IC_StoreWeak, ///< objc_storeWeak (primitive) + IC_InitWeak, ///< objc_initWeak (derived) + IC_LoadWeak, ///< objc_loadWeak (derived) + IC_MoveWeak, ///< objc_moveWeak (derived) + IC_CopyWeak, ///< objc_copyWeak (derived) + IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_CallOrUser, ///< could call objc_release and/or "use" pointers + IC_Call, ///< could call objc_release + IC_User, ///< could "use" a pointer + IC_None ///< anything else + }; +} + +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer. +static bool IsPotentialUse(const Value *Op) { + // Pointers to static or stack storage are not reference-counted pointers. + if (isa<Constant>(Op) || isa<AllocaInst>(Op)) + return false; + // Special arguments are not reference-counted. + if (const Argument *Arg = dyn_cast<Argument>(Op)) + if (Arg->hasByValAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types, and not function pointers. + const PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + if (!Ty || isa<FunctionType>(Ty->getElementType())) + return false; + // Conservatively assume anything else is a potential use. + return true; +} + +/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind +/// of construct CS is. +static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialUse(*I)) + return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; + + return CS.onlyReadsMemory() ? IC_None : IC_Call; +} + +/// GetFunctionClass - Determine if F is one of the special known Functions. +/// If it isn't, return IC_CallOrUser. +static InstructionClass GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + const Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (const PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + const Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} + +/// GetInstructionClass - Determine what kind of construct V is. +static InstructionClass GetInstructionClass(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast<CallInst>(I); + // Check for calls to special functions. + if (const Function *F = CI->getCalledFunction()) { + InstructionClass Class = GetFunctionClass(F); + if (Class != IC_CallOrUser) + return Class; + + // None of the intrinsic functions do objc_release. For intrinsics, the + // only question is whether or not they may be users. + switch (F->getIntrinsicID()) { + case 0: break; + case Intrinsic::bswap: case Intrinsic::ctpop: + case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::returnaddress: case Intrinsic::frameaddress: + case Intrinsic::stacksave: case Intrinsic::stackrestore: + case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return IC_None; + default: + for (Function::const_arg_iterator AI = F->arg_begin(), + AE = F->arg_end(); AI != AE; ++AI) + if (IsPotentialUse(AI)) + return IC_User; + return IC_None; + } + } + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + return GetCallSiteClass(cast<InvokeInst>(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: case Instruction::PHI: + case Instruction::Ret: case Instruction::Br: + case Instruction::Switch: case Instruction::IndirectBr: + case Instruction::Alloca: case Instruction::VAArg: + case Instruction::Add: case Instruction::FAdd: + case Instruction::Sub: case Instruction::FSub: + case Instruction::Mul: case Instruction::FMul: + case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: + case Instruction::SRem: case Instruction::URem: case Instruction::FRem: + case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::And: case Instruction::Or: case Instruction::Xor: + case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: + case Instruction::IntToPtr: case Instruction::FCmp: + case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::UIToFP: case Instruction::SIToFP: + case Instruction::InsertElement: case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialUse(I->getOperand(1))) + return IC_User; + break; + default: + // For anything else, check all the operands. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialUse(*OI)) + return IC_User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return IC_None; +} + +/// GetBasicInstructionClass - Determine what kind of construct V is. This is +/// similar to GetInstructionClass except that it only detects objc runtine +/// calls. This allows it to be faster. +static InstructionClass GetBasicInstructionClass(const Value *V) { + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return IC_CallOrUser; + } + + // Otherwise, be conservative. + return IC_User; +} + +/// IsRetain - Test if the the given class is objc_retain or +/// equivalent. +static bool IsRetain(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV; +} + +/// IsAutorelease - Test if the the given class is objc_autorelease or +/// equivalent. +static bool IsAutorelease(InstructionClass Class) { + return Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// IsForwarding - Test if the given class represents instructions which return +/// their argument verbatim. +static bool IsForwarding(InstructionClass Class) { + // objc_retainBlock technically doesn't always return its argument + // verbatim, but it doesn't matter for our purposes here. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock || + Class == IC_NoopCast; +} + +/// IsNoopOnNull - Test if the given class represents instructions which do +/// nothing if passed a null pointer. +static bool IsNoopOnNull(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock; +} + +/// IsAlwaysTail - Test if the given class represents instructions which are +/// always safe to mark with the "tail" keyword. +static bool IsAlwaysTail(InstructionClass Class) { + // IC_RetainBlock may be given a stack argument. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// IsNoThrow - Test if the given class represents instructions which are always +/// safe to mark with the nounwind attribute.. +static bool IsNoThrow(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_RetainBlock || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_AutoreleasepoolPush || + Class == IC_AutoreleasepoolPop; +} + +/// EraseInstruction - Erase the given instruction. ObjC calls return their +/// argument verbatim, so if it's such a call and the return value has users, +/// replace them with the argument value. +static void EraseInstruction(Instruction *CI) { + Value *OldArg = cast<CallInst>(CI)->getArgOperand(0); + + bool Unused = CI->use_empty(); + + if (!Unused) { + // Replace the return value with the argument. + assert(IsForwarding(GetBasicInstructionClass(CI)) && + "Can't delete non-forwarding instruction with users!"); + CI->replaceAllUsesWith(OldArg); + } + + CI->eraseFromParent(); + + if (Unused) + RecursivelyDeleteTriviallyDeadInstructions(OldArg); +} + +/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which +/// also knows how to look through objc_retain and objc_autorelease calls, which +/// we know to return their argument verbatim. +static const Value *GetUnderlyingObjCPtr(const Value *V) { + for (;;) { + V = GetUnderlyingObject(V); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + + return V; +} + +/// StripPointerCastsAndObjCCalls - This is a wrapper around +/// Value::stripPointerCasts which also knows how to look through objc_retain +/// and objc_autorelease calls, which we know to return their argument verbatim. +static const Value *StripPointerCastsAndObjCCalls(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// StripPointerCastsAndObjCCalls - This is a wrapper around +/// Value::stripPointerCasts which also knows how to look through objc_retain +/// and objc_autorelease calls, which we know to return their argument verbatim. +static Value *StripPointerCastsAndObjCCalls(Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// GetObjCArg - Assuming the given instruction is one of the special calls such +/// as objc_retain or objc_release, return the argument value, stripped of no-op +/// casts and forwarding calls. +static Value *GetObjCArg(Value *Inst) { + return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); +} + +/// IsObjCIdentifiedObject - This is similar to AliasAnalysis' +/// isObjCIdentifiedObject, except that it uses special knowledge of +/// ObjC conventions... +static bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa<CallInst>(V) || isa<InvokeInst>(V) || + isa<Argument>(V) || isa<Constant>(V) || + isa<AllocaInst>(V)) + return true; + + if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { + const Value *Pointer = + StripPointerCastsAndObjCCalls(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || + Name.startswith("\01L_OBJC_METH_VAR_NAME_") || + Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + } + } + + return false; +} + +/// FindSingleUseIdentifiedObject - This is similar to +/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value +/// with multiple uses. +static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { + if (Arg->hasOneUse()) { + if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) + return FindSingleUseIdentifiedObject(BC->getOperand(0)); + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) + if (GEP->hasAllZeroIndices()) + return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); + if (IsForwarding(GetBasicInstructionClass(Arg))) + return FindSingleUseIdentifiedObject( + cast<CallInst>(Arg)->getArgOperand(0)); + if (!IsObjCIdentifiedObject(Arg)) + return 0; + return Arg; + } + + // If we found an identifiable object but it has multiple uses, but they + // are trivial uses, we can still consider this to be a single-use + // value. + if (IsObjCIdentifiedObject(Arg)) { + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) + return 0; + } + + return Arg; + } + + return 0; +} + +/// ModuleHasARC - Test if the given module looks interesting to run ARC +/// optimization on. +static bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer"); +} + +//===----------------------------------------------------------------------===// +// ARC AliasAnalysis. +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" + +namespace { + /// ObjCARCAliasAnalysis - This is a simple alias analysis + /// implementation that uses knowledge of ARC constructs to answer queries. + /// + /// TODO: This class could be generalized to know about other ObjC-specific + /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing + /// even though their offsets are dynamic. + class ObjCARCAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + ObjCARCAliasAnalysis() : ImmutablePass(ID) { + initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + private: + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; +} // End of anonymous namespace + +// Register this pass... +char ObjCARCAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { + return new ObjCARCAliasAnalysis(); +} + +void +ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { + if (!EnableARCOpts) + return AliasAnalysis::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); + const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); + AliasResult Result = + AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), + Location(SB, LocB.Size, LocB.TBAATag)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); + if (UA != SA || UB != SB) { + Result = AliasAnalysis::alias(Location(UA), Location(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool +ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); + if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), + OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) + return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // We have nothing to do. Just chain to the next AliasAnalysis. + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case IC_NoopCast: + return DoesNotAccessMemory; + default: + break; + } + + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefInfo(CS, Loc); + + switch (GetBasicInstructionClass(CS.getInstruction())) { + case IC_Retain: + case IC_RetainRV: + case IC_RetainBlock: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_NoopCast: + case IC_AutoreleasepoolPush: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + return NoModRef; + default: + break; + } + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // TODO: Theoretically we could check for dependencies between objc_* calls + // and OnlyAccessesArgumentPointees calls or other well-behaved calls. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + +//===----------------------------------------------------------------------===// +// ARC expansion. +//===----------------------------------------------------------------------===// + +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/Scalar.h" + +namespace { + /// ObjCARCExpand - Early ARC transformations. + class ObjCARCExpand : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + public: + static char ID; + ObjCARCExpand() : FunctionPass(ID) { + initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCExpand::ID = 0; +INITIALIZE_PASS(ObjCARCExpand, + "objc-arc-expand", "ObjC ARC expansion", false, false) + +Pass *llvm::createObjCARCExpandPass() { + return new ObjCARCExpand(); +} + +void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +bool ObjCARCExpand::doInitialization(Module &M) { + Run = ModuleHasARC(M); + return false; +} + +bool ObjCARCExpand::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + bool Changed = false; + + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + + switch (GetBasicInstructionClass(Inst)) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These calls return their argument verbatim, as a low-level + // optimization. However, this makes high-level optimizations + // harder. Undo any uses of this optimization that the front-end + // emitted here. We'll redo them in a later pass. + Changed = true; + Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); + break; + default: + break; + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// ARC optimization. +//===----------------------------------------------------------------------===// + +// TODO: On code like this: +// +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// stuff_that_cannot_release() +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// +// The second retain and autorelease can be deleted. + +// TODO: It should be possible to delete +// objc_autoreleasePoolPush and objc_autoreleasePoolPop +// pairs if nothing is actually autoreleased between them. Also, autorelease +// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code +// after inlining) can be turned into plain release calls. + +// TODO: Critical-edge splitting. If the optimial insertion point is +// a critical edge, the current algorithm has to fail, because it doesn't +// know how to split edges. It should be possible to make the optimizer +// think in terms of edges, rather than blocks, and then split critical +// edges on demand. + +// TODO: OptimizeSequences could generalized to be Interprocedural. + +// TODO: Recognize that a bunch of other objc runtime calls have +// non-escaping arguments and non-releasing arguments, and may be +// non-autoreleasing. + +// TODO: Sink autorelease calls as far as possible. Unfortunately we +// usually can't sink them past other calls, which would be the main +// case where it would be useful. + +/// TODO: The pointer returned from objc_loadWeakRetained is retained. + +#include "llvm/GlobalAlias.h" +#include "llvm/Constants.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" + +STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); +STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); +STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); +STATISTIC(NumRets, "Number of return value forwarding " + "retain+autoreleaes eliminated"); +STATISTIC(NumRRs, "Number of retain+release paths eliminated"); +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); + +namespace { + /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it + /// uses many of the same techniques, except it uses special ObjC-specific + /// reasoning about pointer relationships. + class ProvenanceAnalysis { + AliasAnalysis *AA; + + typedef std::pair<const Value *, const Value *> ValuePairTy; + typedef DenseMap<ValuePairTy, bool> CachedResultsTy; + CachedResultsTy CachedResults; + + bool relatedCheck(const Value *A, const Value *B); + bool relatedSelect(const SelectInst *A, const Value *B); + bool relatedPHI(const PHINode *A, const Value *B); + + // Do not implement. + void operator=(const ProvenanceAnalysis &); + ProvenanceAnalysis(const ProvenanceAnalysis &); + + public: + ProvenanceAnalysis() {} + + void setAA(AliasAnalysis *aa) { AA = aa; } + + AliasAnalysis *getAA() const { return AA; } + + bool related(const Value *A, const Value *B); + + void clear() { + CachedResults.clear(); + } + }; +} + +bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for relations between the values on corresponding arms. + if (const SelectInst *SB = dyn_cast<SelectInst>(B)) + if (A->getCondition() == SB->getCondition()) { + if (related(A->getTrueValue(), SB->getTrueValue())) + return true; + if (related(A->getFalseValue(), SB->getFalseValue())) + return true; + return false; + } + + // Check both arms of the Select node individually. + if (related(A->getTrueValue(), B)) + return true; + if (related(A->getFalseValue(), B)) + return true; + + // The arms both checked out. + return false; +} + +bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { + // If the values are PHIs in the same block, we can do a more precise as well + // as efficient check: just check for relations between the values on + // corresponding edges. + if (const PHINode *PNB = dyn_cast<PHINode>(B)) + if (PNB->getParent() == A->getParent()) { + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) + if (related(A->getIncomingValue(i), + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + return true; + return false; + } + + // Check each unique source of the PHI node against B. + SmallPtrSet<const Value *, 4> UniqueSrc; + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { + const Value *PV1 = A->getIncomingValue(i); + if (UniqueSrc.insert(PV1) && related(PV1, B)) + return true; + } + + // All of the arms checked out. + return false; +} + +/// isStoredObjCPointer - Test if the value of P, or any value covered by its +/// provenance, is ever stored within the function (not counting callees). +static bool isStoredObjCPointer(const Value *P) { + SmallPtrSet<const Value *, 8> Visited; + SmallVector<const Value *, 8> Worklist; + Worklist.push_back(P); + Visited.insert(P); + do { + P = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); + UI != UE; ++UI) { + const User *Ur = *UI; + if (isa<StoreInst>(Ur)) { + if (UI.getOperandNo() == 0) + // The pointer is stored. + return true; + // The pointed is stored through. + continue; + } + if (isa<CallInst>(Ur)) + // The pointer is passed as an argument, ignore this. + continue; + if (isa<PtrToIntInst>(P)) + // Assume the worst. + return true; + if (Visited.insert(Ur)) + Worklist.push_back(Ur); + } + } while (!Worklist.empty()); + + // Everything checked out. + return false; +} + +bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { + // Skip past provenance pass-throughs. + A = GetUnderlyingObjCPtr(A); + B = GetUnderlyingObjCPtr(B); + + // Quick check. + if (A == B) + return true; + + // Ask regular AliasAnalysis, for a first approximation. + switch (AA->alias(A, B)) { + case AliasAnalysis::NoAlias: + return false; + case AliasAnalysis::MustAlias: + case AliasAnalysis::PartialAlias: + return true; + case AliasAnalysis::MayAlias: + break; + } + + bool AIsIdentified = IsObjCIdentifiedObject(A); + bool BIsIdentified = IsObjCIdentifiedObject(B); + + // An ObjC-Identified object can't alias a load if it is never locally stored. + if (AIsIdentified) { + if (BIsIdentified) { + // If both pointers have provenance, they can be directly compared. + if (A != B) + return false; + } else { + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); + } + } else { + if (BIsIdentified && isa<LoadInst>(A)) + return isStoredObjCPointer(B); + } + + // Special handling for PHI and Select. + if (const PHINode *PN = dyn_cast<PHINode>(A)) + return relatedPHI(PN, B); + if (const PHINode *PN = dyn_cast<PHINode>(B)) + return relatedPHI(PN, A); + if (const SelectInst *S = dyn_cast<SelectInst>(A)) + return relatedSelect(S, B); + if (const SelectInst *S = dyn_cast<SelectInst>(B)) + return relatedSelect(S, A); + + // Conservative. + return true; +} + +bool ProvenanceAnalysis::related(const Value *A, const Value *B) { + // Begin by inserting a conservative value into the map. If the insertion + // fails, we have the answer already. If it succeeds, leave it there until we + // compute the real answer to guard against recursive queries. + if (A > B) std::swap(A, B); + std::pair<CachedResultsTy::iterator, bool> Pair = + CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); + if (!Pair.second) + return Pair.first->second; + + bool Result = relatedCheck(A, B); + CachedResults[ValuePairTy(A, B)] = Result; + return Result; +} + +namespace { + // Sequence - A sequence of states that a pointer may go through in which an + // objc_retain and objc_release are actually needed. + enum Sequence { + S_None, + S_Retain, ///< objc_retain(x) + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement + S_Use, ///< any use of x + S_Stop, ///< like S_Release, but code motion is stopped + S_Release, ///< objc_release(x) + S_MovableRelease ///< objc_release(x), !clang.imprecise_release + }; +} + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + // Note that we can't merge S_CanRelease and S_Use. + if (A > B) std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if (A == S_Retain && (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +namespace { + /// RRInfo - Unidirectional information about either a + /// retain-decrement-use-release sequence or release-use-decrement-retain + /// reverese sequence. + struct RRInfo { + /// KnownIncremented - After an objc_retain, the reference count of the + /// referenced object is known to be positive. Similarly, before an + /// objc_release, the reference count of the referenced object is known to + /// be positive. If there are retain-release pairs in code regions where the + /// retain count is known to be positive, they can be eliminated, regardless + /// of any side effects between them. + bool KnownIncremented; + + /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as + /// opposed to objc_retain calls). + bool IsRetainBlock; + + /// IsTailCallRelease - True of the objc_release calls are all marked + /// with the "tail" keyword. + bool IsTailCallRelease; + + /// ReleaseMetadata - If the Calls are objc_release calls and they all have + /// a clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// Calls - For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet<Instruction *, 2> Calls; + + /// ReverseInsertPts - The set of optimal insert positions for + /// moving calls in the opposite sequence. + SmallPtrSet<Instruction *, 2> ReverseInsertPts; + + RRInfo() : + KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false), + ReleaseMetadata(0) {} + + void clear(); + }; +} + +void RRInfo::clear() { + KnownIncremented = false; + IsRetainBlock = false; + IsTailCallRelease = false; + ReleaseMetadata = 0; + Calls.clear(); + ReverseInsertPts.clear(); +} + +namespace { + /// PtrState - This class summarizes several per-pointer runtime properties + /// which are propogated through the flow graph. + class PtrState { + /// RefCount - The known minimum number of reference count increments. + unsigned RefCount; + + /// Seq - The current position in the sequence. + Sequence Seq; + + public: + /// RRI - Unidirectional information about the current sequence. + /// TODO: Encapsulate this better. + RRInfo RRI; + + PtrState() : RefCount(0), Seq(S_None) {} + + void IncrementRefCount() { + if (RefCount != UINT_MAX) ++RefCount; + } + + void DecrementRefCount() { + if (RefCount != 0) --RefCount; + } + + void ClearRefCount() { + RefCount = 0; + } + + bool IsKnownIncremented() const { + return RefCount > 0; + } + + void SetSeq(Sequence NewSeq) { + Seq = NewSeq; + } + + void SetSeqToRelease(MDNode *M) { + if (Seq == S_None || Seq == S_Use) { + Seq = M ? S_MovableRelease : S_Release; + RRI.ReleaseMetadata = M; + } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { + Seq = S_Release; + RRI.ReleaseMetadata = 0; + } + } + + Sequence GetSeq() const { + return Seq; + } + + void ClearSequenceProgress() { + Seq = S_None; + RRI.clear(); + } + + void Merge(const PtrState &Other, bool TopDown); + }; +} + +void +PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(Seq, Other.Seq, TopDown); + RefCount = std::min(RefCount, Other.RefCount); + + // We can't merge a plain objc_retain with an objc_retainBlock. + if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) + Seq = S_None; + + if (Seq == S_None) { + RRI.clear(); + } else { + // Conservatively merge the ReleaseMetadata information. + if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) + RRI.ReleaseMetadata = 0; + + RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; + RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(), + Other.RRI.ReverseInsertPts.end()); + } +} + +namespace { + /// BBState - Per-BasicBlock state. + class BBState { + /// TopDownPathCount - The number of unique control paths from the entry + /// which can reach this block. + unsigned TopDownPathCount; + + /// BottomUpPathCount - The number of unique control paths to exits + /// from this block. + unsigned BottomUpPathCount; + + /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp. + typedef MapVector<const Value *, PtrState> MapTy; + + /// PerPtrTopDown - The top-down traversal uses this to record information + /// known about a pointer at the bottom of each block. + MapTy PerPtrTopDown; + + /// PerPtrBottomUp - The bottom-up traversal uses this to record information + /// known about a pointer at the top of each block. + MapTy PerPtrBottomUp; + + public: + BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} + + typedef MapTy::iterator ptr_iterator; + typedef MapTy::const_iterator ptr_const_iterator; + + ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + ptr_const_iterator top_down_ptr_begin() const { + return PerPtrTopDown.begin(); + } + ptr_const_iterator top_down_ptr_end() const { + return PerPtrTopDown.end(); + } + + ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } + ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + ptr_const_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + ptr_const_iterator bottom_up_ptr_end() const { + return PerPtrBottomUp.end(); + } + + /// SetAsEntry - Mark this block as being an entry block, which has one + /// path from the entry by definition. + void SetAsEntry() { TopDownPathCount = 1; } + + /// SetAsExit - Mark this block as being an exit block, which has one + /// path to an exit by definition. + void SetAsExit() { BottomUpPathCount = 1; } + + PtrState &getPtrTopDownState(const Value *Arg) { + return PerPtrTopDown[Arg]; + } + + PtrState &getPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp[Arg]; + } + + void clearBottomUpPointers() { + PerPtrTopDown.clear(); + } + + void clearTopDownPointers() { + PerPtrTopDown.clear(); + } + + void InitFromPred(const BBState &Other); + void InitFromSucc(const BBState &Other); + void MergePred(const BBState &Other); + void MergeSucc(const BBState &Other); + + /// GetAllPathCount - Return the number of possible unique paths from an + /// entry to an exit which pass through this block. This is only valid + /// after both the top-down and bottom-up traversals are complete. + unsigned GetAllPathCount() const { + return TopDownPathCount * BottomUpPathCount; + } + }; +} + +void BBState::InitFromPred(const BBState &Other) { + PerPtrTopDown = Other.PerPtrTopDown; + TopDownPathCount = Other.TopDownPathCount; +} + +void BBState::InitFromSucc(const BBState &Other) { + PerPtrBottomUp = Other.PerPtrBottomUp; + BottomUpPathCount = Other.BottomUpPathCount; +} + +/// MergePred - The top-down traversal uses this to merge information about +/// predecessors to form the initial state for a new block. +void BBState::MergePred(const BBState &Other) { + // Other.TopDownPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + TopDownPathCount += Other.TopDownPathCount; + + // For each entry in the other set, if our set has an entry with the same key, + // merge the entries. Otherwise, copy the entry and merge it with an empty + // entry. + for (ptr_const_iterator MI = Other.top_down_ptr_begin(), + ME = Other.top_down_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/true); + } + + // For each entry in our set, if the other set doens't have an entry with the + // same key, force it to merge with an empty entry. + for (ptr_iterator MI = top_down_ptr_begin(), + ME = top_down_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) + MI->second.Merge(PtrState(), /*TopDown=*/true); +} + +/// MergeSucc - The bottom-up traversal uses this to merge information about +/// successors to form the initial state for a new block. +void BBState::MergeSucc(const BBState &Other) { + // Other.BottomUpPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + BottomUpPathCount += Other.BottomUpPathCount; + + // For each entry in the other set, if our set has an entry with the + // same key, merge the entries. Otherwise, copy the entry and merge + // it with an empty entry. + for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), + ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/false); + } + + // For each entry in our set, if the other set doens't have an entry + // with the same key, force it to merge with an empty entry. + for (ptr_iterator MI = bottom_up_ptr_begin(), + ME = bottom_up_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) + MI->second.Merge(PtrState(), /*TopDown=*/false); +} + +namespace { + /// ObjCARCOpt - The main ARC optimization pass. + class ObjCARCOpt : public FunctionPass { + bool Changed; + ProvenanceAnalysis PA; + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + /// RetainFunc, RelaseFunc - Declarations for objc_retain, + /// objc_retainBlock, and objc_release. + Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc; + + /// RetainRVCallee, etc. - Declarations for ObjC runtime + /// functions, for use in creating calls to them. These are initialized + /// lazily to avoid cluttering up the Module with unused declarations. + Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, + *RetainCallee, *AutoreleaseCallee; + + /// UsedInThisFunciton - Flags which determine whether each of the + /// interesting runtine functions is in fact used in the current function. + unsigned UsedInThisFunction; + + /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release + /// metadata. + unsigned ImpreciseReleaseMDKind; + + Constant *getRetainRVCallee(Module *M); + Constant *getAutoreleaseRVCallee(Module *M); + Constant *getReleaseCallee(Module *M); + Constant *getRetainCallee(Module *M); + Constant *getAutoreleaseCallee(Module *M); + + void OptimizeRetainCall(Function &F, Instruction *Retain); + bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); + void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); + void OptimizeIndividualCalls(Function &F); + + void CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const; + bool VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains); + bool VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases); + bool Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases); + + void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts); + + bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases); + + void OptimizeWeakCalls(Function &F); + + bool OptimizeSequences(Function &F); + + void OptimizeReturns(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual void releaseMemory(); + + public: + static char ID; + ObjCARCOpt() : FunctionPass(ID) { + initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCOpt::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) +INITIALIZE_PASS_END(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) + +Pass *llvm::createObjCARCOptPass() { + return new ObjCARCOpt(); +} + +void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<ObjCARCAliasAnalysis>(); + AU.addRequired<AliasAnalysis>(); + // ARC optimization doesn't currently split critical edges. + AU.setPreservesCFG(); +} + +Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attributes); + } + return RetainRVCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { + if (!AutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + AutoreleaseRVCallee = + M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, + Attributes); + } + return AutoreleaseRVCallee; +} + +Constant *ObjCARCOpt::getReleaseCallee(Module *M) { + if (!ReleaseCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + ReleaseCallee = + M->getOrInsertFunction( + "objc_release", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attributes); + } + return ReleaseCallee; +} + +Constant *ObjCARCOpt::getRetainCallee(Module *M) { + if (!RetainCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainCallee = + M->getOrInsertFunction( + "objc_retain", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return RetainCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { + if (!AutoreleaseCallee) { + LLVMContext &C = M->getContext(); + std::vector<Type *> Params; + Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + AutoreleaseCallee = + M->getOrInsertFunction( + "objc_autorelease", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attributes); + } + return AutoreleaseCallee; +} + +/// CanAlterRefCount - Test whether the given instruction can result in a +/// reference count modification (positive or negative) for the pointer's +/// object. +static bool +CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class) { + switch (Class) { + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_User: + // These operations never directly modify a reference count. + return false; + default: break; + } + + ImmutableCallSite CS = static_cast<const Value *>(Inst); + assert(CS && "Only calls can alter reference counts!"); + + // See if AliasAnalysis can help us with the call. + AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + if (AliasAnalysis::onlyReadsMemory(MRB)) + return false; + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + const Value *Op = *I; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; + } + + // Assume the worst. + return true; +} + +/// CanUse - Test whether the given instruction can "use" the given pointer's +/// object in a way that requires the reference count to be positive. +static bool +CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, + InstructionClass Class) { + // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. + if (Class == IC_Call) + return false; + + // Consider various instructions which may have pointer arguments which are + // not "uses". + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { + // Comparing a pointer with null, or any other constant, isn't really a use, + // because we don't care what the pointer points to, or about the values + // of any other dynamic reference-counted pointers. + if (!IsPotentialUse(ICI->getOperand(1))) + return false; + } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { + // For calls, just check the arguments (and not the callee operand). + for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), + OE = CS.arg_end(); OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Special-case stores, because we don't care about the stored value, just + // the store address. + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + // If we can't tell what the underlying object was, assume there is a + // dependence. + return IsPotentialUse(Op) && PA.related(Op, Ptr); + } + + // Check each operand for a match. + for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); + OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + return true; + } + return false; +} + +/// CanInterruptRV - Test whether the given instruction can autorelease +/// any pointer or cause an autoreleasepool pop. +static bool +CanInterruptRV(InstructionClass Class) { + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_CallOrUser: + case IC_Call: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + return true; + default: + return false; + } +} + +namespace { + /// DependenceKind - There are several kinds of dependence-like concepts in + /// use here. + enum DependenceKind { + NeedsPositiveRetainCount, + CanChangeRetainCount, + RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. + RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. + RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. + }; +} + +/// Depends - Test if there can be dependencies on Inst through Arg. This +/// function only tests dependencies relevant for removing pairs of calls. +static bool +Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, + ProvenanceAnalysis &PA) { + // If we've reached the definition of Arg, stop. + if (Inst == Arg) + return true; + + switch (Flavor) { + case NeedsPositiveRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanUse(Inst, Arg, PA, Class); + } + } + + case CanChangeRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + // Conservatively assume this can decrement any count. + return true; + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanAlterRefCount(Inst, Arg, PA, Class); + } + } + + case RetainAutoreleaseDep: + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPop: + // Don't merge an objc_autorelease with an objc_retain inside a different + // autoreleasepool scope. + return true; + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Nothing else matters for objc_retainAutorelease formation. + return false; + } + break; + + case RetainAutoreleaseRVDep: { + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Anything that can autorelease interrupts + // retainAutoreleaseReturnValue formation. + return CanInterruptRV(Class); + } + break; + } + + case RetainRVDep: + return CanInterruptRV(GetBasicInstructionClass(Inst)); + } + + llvm_unreachable("Invalid dependence flavor"); + return true; +} + +/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and +/// find local and non-local dependencies on Arg. +/// TODO: Cache results? +static void +FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet<Instruction *, 4> &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA) { + BasicBlock::iterator StartPos = StartInst; + + SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist; + Worklist.push_back(std::make_pair(StartBB, StartPos)); + do { + std::pair<BasicBlock *, BasicBlock::iterator> Pair = + Worklist.pop_back_val(); + BasicBlock *LocalStartBB = Pair.first; + BasicBlock::iterator LocalStartPos = Pair.second; + BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); + for (;;) { + if (LocalStartPos == StartBBBegin) { + pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); + if (PI == PE) + // If we've reached the function entry, produce a null dependence. + DependingInstructions.insert(0); + else + // Add the predecessors to the worklist. + do { + BasicBlock *PredBB = *PI; + if (Visited.insert(PredBB)) + Worklist.push_back(std::make_pair(PredBB, PredBB->end())); + } while (++PI != PE); + break; + } + + Instruction *Inst = --LocalStartPos; + if (Depends(Flavor, Inst, Arg, PA)) { + DependingInstructions.insert(Inst); + break; + } + } + } while (!Worklist.empty()); + + // Determine whether the original StartBB post-dominates all of the blocks we + // visited. If not, insert a sentinal indicating that most optimizations are + // not safe. + for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(), + E = Visited.end(); I != E; ++I) { + const BasicBlock *BB = *I; + if (BB == StartBB) + continue; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + if (Succ != StartBB && !Visited.count(Succ)) { + DependingInstructions.insert(reinterpret_cast<Instruction *>(-1)); + return; + } + } + } +} + +static bool isNullOrUndef(const Value *V) { + return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); +} + +static bool isNoopInstruction(const Instruction *I) { + return isa<BitCastInst>(I) || + (isa<GetElementPtrInst>(I) && + cast<GetElementPtrInst>(I)->hasAllZeroIndices()); +} + +/// OptimizeRetainCall - Turn objc_retain into +/// objc_retainAutoreleasedReturnValue if the operand is a return value. +void +ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { + CallSite CS(GetObjCArg(Retain)); + Instruction *Call = CS.getInstruction(); + if (!Call) return; + if (Call->getParent() != Retain->getParent()) return; + + // Check that the call is next to the retain. + BasicBlock::iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I != Retain) + return; + + // Turn it to an objc_retainAutoreleasedReturnValue.. + Changed = true; + ++NumPeeps; + cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); +} + +/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into +/// objc_retain if the operand is not a return value. Or, if it can be +/// paired with an objc_autoreleaseReturnValue, delete the pair and +/// return true. +bool +ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { + // Check for the argument being from an immediately preceding call. + Value *Arg = GetObjCArg(RetainRV); + CallSite CS(Arg); + if (Instruction *Call = CS.getInstruction()) + if (Call->getParent() == RetainRV->getParent()) { + BasicBlock::iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } + + // Check for being preceded by an objc_autoreleaseReturnValue on the same + // pointer. In this case, we can delete the pair. + BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); + if (I != Begin) { + do --I; while (I != Begin && isNoopInstruction(I)); + if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && + GetObjCArg(I) == Arg) { + Changed = true; + ++NumPeeps; + EraseInstruction(I); + EraseInstruction(RetainRV); + return true; + } + } + + // Turn it to a plain objc_retain. + Changed = true; + ++NumPeeps; + cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); + return false; +} + +/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into +/// objc_autorelease if the result is not used as a return value. +void +ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { + // Check for a return of the pointer value. + const Value *Ptr = GetObjCArg(AutoreleaseRV); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + } + + Changed = true; + ++NumPeeps; + cast<CallInst>(AutoreleaseRV)-> + setCalledFunction(getAutoreleaseCallee(F.getParent())); +} + +/// OptimizeIndividualCalls - Visit each call, one at a time, and make +/// simplifications without doing any additional analysis. +void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + // Reset all the flags in preparation for recomputing them. + UsedInThisFunction = 0; + + // Visit all objc_* calls in F. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + + switch (Class) { + default: break; + + // Delete no-op casts. These function calls have special semantics, but + // the semantics are entirely implemented via lowering in the front-end, + // so by the time they reach the optimizer, they are just no-op calls + // which return their argument. + // + // There are gray areas here, as the ability to cast reference-counted + // pointers to raw void* and back allows code to break ARC assumptions, + // however these are currently considered to be unimportant. + case IC_NoopCast: + Changed = true; + ++NumNoops; + EraseInstruction(Inst); + continue; + + // If the pointer-to-weak-pointer is null, it's undefined behavior. + case IC_StoreWeak: + case IC_LoadWeak: + case IC_LoadWeakRetained: + case IC_InitWeak: + case IC_DestroyWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0))) { + const Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_CopyWeak: + case IC_MoveWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0)) || + isNullOrUndef(CI->getArgOperand(1))) { + const Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_Retain: + OptimizeRetainCall(F, Inst); + break; + case IC_RetainRV: + if (OptimizeRetainRVCall(F, Inst)) + continue; + break; + case IC_AutoreleaseRV: + OptimizeAutoreleaseRVCall(F, Inst); + break; + } + + // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. + if (IsAutorelease(Class) && Inst->use_empty()) { + CallInst *Call = cast<CallInst>(Inst); + const Value *Arg = Call->getArgOperand(0); + Arg = FindSingleUseIdentifiedObject(Arg); + if (Arg) { + Changed = true; + ++NumAutoreleases; + + // Create the declaration lazily. + LLVMContext &C = Inst->getContext(); + CallInst *NewCall = + CallInst::Create(getReleaseCallee(F.getParent()), + Call->getArgOperand(0), "", Call); + NewCall->setMetadata(ImpreciseReleaseMDKind, + MDNode::get(C, ArrayRef<Value *>())); + EraseInstruction(Call); + Inst = NewCall; + Class = IC_Release; + } + } + + // For functions which can never be passed stack arguments, add + // a tail keyword. + if (IsAlwaysTail(Class)) { + Changed = true; + cast<CallInst>(Inst)->setTailCall(); + } + + // Set nounwind as needed. + if (IsNoThrow(Class)) { + Changed = true; + cast<CallInst>(Inst)->setDoesNotThrow(); + } + + if (!IsNoopOnNull(Class)) { + UsedInThisFunction |= 1 << Class; + continue; + } + + const Value *Arg = GetObjCArg(Inst); + + // ARC calls with null are no-ops. Delete them. + if (isNullOrUndef(Arg)) { + Changed = true; + ++NumNoops; + EraseInstruction(Inst); + continue; + } + + // Keep track of which of retain, release, autorelease, and retain_block + // are actually present in this function. + UsedInThisFunction |= 1 << Class; + + // If Arg is a PHI, and one or more incoming values to the + // PHI are null, and the call is control-equivalent to the PHI, and there + // are no relevant side effects between the PHI and the call, the call + // could be pushed up to just those paths with non-null incoming values. + // For now, don't bother splitting critical edges for this. + SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; + Worklist.push_back(std::make_pair(Inst, Arg)); + do { + std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); + Inst = Pair.first; + Arg = Pair.second; + + const PHINode *PN = dyn_cast<PHINode>(Arg); + if (!PN) continue; + + // Determine if the PHI has any null operands, or any incoming + // critical edges. + bool HasNull = false; + bool HasCriticalEdges = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (isNullOrUndef(Incoming)) + HasNull = true; + else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) + .getNumSuccessors() != 1) { + HasCriticalEdges = true; + break; + } + } + // If we have null operands and no critical edges, optimize. + if (!HasCriticalEdges && HasNull) { + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + + // Check that there is nothing that cares about the reference + // count between the call and the phi. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() == 1 && + *DependingInstructions.begin() == PN) { + Changed = true; + ++NumPartialNoops; + // Clone the call into each predecessor that has a non-null value. + CallInst *CInst = cast<CallInst>(Inst); + const Type *ParamTy = CInst->getArgOperand(0)->getType(); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (!isNullOrUndef(Incoming)) { + CallInst *Clone = cast<CallInst>(CInst->clone()); + Value *Op = PN->getIncomingValue(i); + Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); + if (Op->getType() != ParamTy) + Op = new BitCastInst(Op, ParamTy, "", InsertPos); + Clone->setArgOperand(0, Op); + Clone->insertBefore(InsertPos); + Worklist.push_back(std::make_pair(Clone, Incoming)); + } + } + // Erase the original call. + EraseInstruction(CInst); + continue; + } + } + } while (!Worklist.empty()); + } +} + +/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible +/// control flow, or other CFG structures where moving code across the edge +/// would result in it being executed more. +void +ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const { + // If any top-down local-use or possible-dec has a succ which is earlier in + // the sequence, forget it. + for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) + switch (I->second.GetSeq()) { + default: break; + case S_Use: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) + switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { + case S_None: + case S_CanRelease: + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + SomeSuccHasSame = false; + break; + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + } + case S_CanRelease: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) + switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) { + case S_None: + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + SomeSuccHasSame = false; + break; + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + MyStates.getPtrTopDownState(Arg).ClearSequenceProgress(); + } + } +} + +bool +ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each successor to compute the initial state + // for the current block. + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + succ_const_iterator SI(TI), SE(TI, false); + if (SI == SE) + MyStates.SetAsExit(); + else + do { + const BasicBlock *Succ = *SI++; + if (Succ == BB) + continue; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); + if (I == BBStates.end()) + continue; + MyStates.InitFromSucc(I->second); + while (SI != SE) { + Succ = *SI++; + if (Succ != BB) { + I = BBStates.find(Succ); + if (I != BBStates.end()) + MyStates.MergeSucc(I->second); + } + } + break; + } while (SI != SE); + + // Visit all the instructions, bottom-up. + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { + Instruction *Inst = llvm::prior(I); + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + NestingDetected = true; + + S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); + S.RRI.clear(); + S.RRI.KnownIncremented = S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.IncrementRefCount(); + break; + } + case IC_RetainBlock: + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.DecrementRefCount(); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + continue; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + continue; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible retains and releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + // Check for a retain (we're going bottom-up here). + S.DecrementRefCount(); + + // Check for a release. + if (!IsRetain(Class) && Class != IC_RetainBlock) + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + S.RRI.ReverseInsertPts.clear(); + S.RRI.ReverseInsertPts.insert(Inst); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + S.RRI.ReverseInsertPts.clear(); + S.RRI.ReverseInsertPts.insert(Inst); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each predecessor to compute the initial state + // for the current block. + const_pred_iterator PI(BB), PE(BB, false); + if (PI == PE) + MyStates.SetAsEntry(); + else + do { + const BasicBlock *Pred = *PI++; + if (Pred == BB) + continue; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); + if (I == BBStates.end()) + continue; + MyStates.InitFromPred(I->second); + while (PI != PE) { + Pred = *PI++; + if (Pred != BB) { + I = BBStates.find(Pred); + if (I != BBStates.end()) + MyStates.MergePred(I->second); + } + } + break; + } while (PI != PE); + + // Visit all the instructions, top-down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + Instruction *Inst = I; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Retain) + NestingDetected = true; + + S.SetSeq(S_Retain); + S.RRI.clear(); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + S.RRI.KnownIncremented = S.IsKnownIncremented(); + S.RRI.Calls.insert(Inst); + } + + S.IncrementRefCount(); + break; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.DecrementRefCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + continue; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + continue; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (!IsRetain(Class) && Class != IC_RetainBlock && + CanAlterRefCount(Inst, Ptr, PA, Class)) { + // Check for a release. + S.DecrementRefCount(); + + // Check for a release. + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + S.RRI.ReverseInsertPts.clear(); + S.RRI.ReverseInsertPts.insert(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; + case S_Use: + case S_CanRelease: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Use: + case S_Retain: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + } + + CheckForCFGHazards(BB, BBStates, MyStates); + return NestingDetected; +} + +// Visit - Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + // Use postorder for bottom-up, and reverse-postorder for top-down, because we + // magically know that loops will be well behaved, i.e. they won't repeatedly + // call retain on a single pointer without doing a release. + bool BottomUpNestingDetected = false; + SmallVector<BasicBlock *, 8> PostOrder; + for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) { + BasicBlock *BB = *I; + PostOrder.push_back(BB); + + BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); + } + + // Iterate through the post-order in reverse order, achieving a + // reverse-postorder traversal. We don't use the ReversePostOrderTraversal + // class here because it works by computing its own full postorder iteration, + // recording the sequence, and playing it back in reverse. Since we're already + // doing a full iteration above, we can just record the sequence manually and + // avoid the cost of having ReversePostOrderTraversal compute it. + bool TopDownNestingDetected = false; + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator + RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI) + TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases); + + return TopDownNestingDetected && BottomUpNestingDetected; +} + +/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove. +void ObjCARCOpt::MoveCalls(Value *Arg, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts) { + const Type *ArgTy = Arg->getType(); + const Type *ParamTy = + (RetainRVFunc ? RetainRVFunc : + RetainFunc ? RetainFunc : + RetainBlockFunc)->arg_begin()->getType(); + + // Insert the new retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = ReleasesToMove.ReverseInsertPts.begin(), + PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = + CallInst::Create(RetainsToMove.IsRetainBlock ? + RetainBlockFunc : RetainFunc, + MyArg, "", InsertPt); + Call->setDoesNotThrow(); + if (!RetainsToMove.IsRetainBlock) + Call->setTailCall(); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = RetainsToMove.ReverseInsertPts.begin(), + PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *LastUse = *PI; + Instruction *InsertPts[] = { 0, 0, 0 }; + if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) { + // We can't insert code immediately after an invoke instruction, so + // insert code at the beginning of both successor blocks instead. + // The invoke's return value isn't available in the unwind block, + // but our releases will never depend on it, because they must be + // paired with retains from before the invoke. + InsertPts[0] = II->getNormalDest()->getFirstNonPHI(); + InsertPts[1] = II->getUnwindDest()->getFirstNonPHI(); + } else { + // Insert code immediately after the last use. + InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); + } + + for (Instruction **I = InsertPts; *I; ++I) { + Instruction *InsertPt = *I; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); + } + } + + // Delete the original retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = RetainsToMove.Calls.begin(), + AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRetain = *AI; + Retains.blot(OrigRetain); + DeadInsts.push_back(OrigRetain); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = ReleasesToMove.Calls.begin(), + AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRelease = *AI; + Releases.erase(OrigRelease); + DeadInsts.push_back(OrigRelease); + } +} + +bool +ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> + &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + bool AnyPairsCompletelyEliminated = false; + RRInfo RetainsToMove; + RRInfo ReleasesToMove; + SmallVector<Instruction *, 4> NewRetains; + SmallVector<Instruction *, 4> NewReleases; + SmallVector<Instruction *, 8> DeadInsts; + + for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), + E = Retains.end(); I != E; ) { + Value *V = (I++)->first; + if (!V) continue; // blotted + + Instruction *Retain = cast<Instruction>(V); + Value *Arg = GetObjCArg(Retain); + + // If the object being released is in static or stack storage, we know it's + // not being managed by ObjC reference counting, so we can delete pairs + // regardless of what possible decrements or uses lie between them. + bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); + + // If a pair happens in a region where it is known that the reference count + // is already incremented, we can similarly ignore possible decrements. + bool KnownIncrementedTD = true, KnownIncrementedBU = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + // This is an iterative process so that we connect multiple releases + // to multiple retains if needed. + unsigned OldDelta = 0; + unsigned NewDelta = 0; + unsigned OldCount = 0; + unsigned NewCount = 0; + bool FirstRelease = true; + bool FirstRetain = true; + NewRetains.push_back(Retain); + for (;;) { + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { + Instruction *NewRetain = *NI; + MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); + assert(It != Retains.end()); + const RRInfo &NewRetainRRI = It->second; + KnownIncrementedTD &= NewRetainRRI.KnownIncremented; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewRetainRRI.Calls.begin(), + LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewRetainRelease = *LI; + DenseMap<Value *, RRInfo>::const_iterator Jt = + Releases.find(NewRetainRelease); + if (Jt == Releases.end()) + goto next_retain; + const RRInfo &NewRetainReleaseRRI = Jt->second; + assert(NewRetainReleaseRRI.Calls.count(NewRetain)); + if (ReleasesToMove.Calls.insert(NewRetainRelease)) { + OldDelta -= + BBStates[NewRetainRelease->getParent()].GetAllPathCount(); + + // Merge the ReleaseMetadata and IsTailCallRelease values. + if (FirstRelease) { + ReleasesToMove.ReleaseMetadata = + NewRetainReleaseRRI.ReleaseMetadata; + ReleasesToMove.IsTailCallRelease = + NewRetainReleaseRRI.IsTailCallRelease; + FirstRelease = false; + } else { + if (ReleasesToMove.ReleaseMetadata != + NewRetainReleaseRRI.ReleaseMetadata) + ReleasesToMove.ReleaseMetadata = 0; + if (ReleasesToMove.IsTailCallRelease != + NewRetainReleaseRRI.IsTailCallRelease) + ReleasesToMove.IsTailCallRelease = false; + } + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), + RE = NewRetainReleaseRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (ReleasesToMove.ReverseInsertPts.insert(RIP)) + NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); + } + NewReleases.push_back(NewRetainRelease); + } + } + } + NewRetains.clear(); + if (NewReleases.empty()) break; + + // Back the other way. + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { + Instruction *NewRelease = *NI; + DenseMap<Value *, RRInfo>::const_iterator It = + Releases.find(NewRelease); + assert(It != Releases.end()); + const RRInfo &NewReleaseRRI = It->second; + KnownIncrementedBU &= NewReleaseRRI.KnownIncremented; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewReleaseRRI.Calls.begin(), + LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewReleaseRetain = *LI; + MapVector<Value *, RRInfo>::const_iterator Jt = + Retains.find(NewReleaseRetain); + if (Jt == Retains.end()) + goto next_retain; + const RRInfo &NewReleaseRetainRRI = Jt->second; + assert(NewReleaseRetainRRI.Calls.count(NewRelease)); + if (RetainsToMove.Calls.insert(NewReleaseRetain)) { + unsigned PathCount = + BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); + OldDelta += PathCount; + OldCount += PathCount; + + // Merge the IsRetainBlock values. + if (FirstRetain) { + RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; + FirstRetain = false; + } else if (ReleasesToMove.IsRetainBlock != + NewReleaseRetainRRI.IsRetainBlock) + // It's not possible to merge the sequences if one uses + // objc_retain and the other uses objc_retainBlock. + goto next_retain; + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), + RE = NewReleaseRetainRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (RetainsToMove.ReverseInsertPts.insert(RIP)) { + PathCount = BBStates[RIP->getParent()].GetAllPathCount(); + NewDelta += PathCount; + NewCount += PathCount; + } + } + NewRetains.push_back(NewReleaseRetain); + } + } + } + NewReleases.clear(); + if (NewRetains.empty()) break; + } + + // If the pointer is known incremented, we can safely delete the pair + // regardless of what's between them. + if (KnownIncrementedTD || KnownIncrementedBU) { + RetainsToMove.ReverseInsertPts.clear(); + ReleasesToMove.ReverseInsertPts.clear(); + NewCount = 0; + } + + // Determine whether the original call points are balanced in the retain and + // release calls through the program. If not, conservatively don't touch + // them. + // TODO: It's theoretically possible to do code motion in this case, as + // long as the existing imbalances are maintained. + if (OldDelta != 0) + goto next_retain; + + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + goto next_retain; + + // Ok, everything checks out and we're all set. Let's move some code! + Changed = true; + AnyPairsCompletelyEliminated = NewCount == 0; + NumRRs += OldCount - NewCount; + MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts); + + next_retain: + NewReleases.clear(); + NewRetains.clear(); + RetainsToMove.clear(); + ReleasesToMove.clear(); + } + + // Now that we're done moving everything, we can delete the newly dead + // instructions, as we no longer need them as insert points. + while (!DeadInsts.empty()) + EraseInstruction(DeadInsts.pop_back_val()); + + return AnyPairsCompletelyEliminated; +} + +/// OptimizeWeakCalls - Weak pointer optimizations. +void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + // First, do memdep-style RLE and S2L optimizations. We can't use memdep + // itself because it uses AliasAnalysis and we need to do provenance + // queries instead. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) + continue; + + // Delete objc_loadWeak calls with no users. + if (Class == IC_LoadWeak && Inst->use_empty()) { + Inst->eraseFromParent(); + continue; + } + + // TODO: For now, just look for an earlier available version of this value + // within the same block. Theoretically, we could do memdep-style non-local + // analysis too, but that would want caching. A better approach would be to + // use the technique that EarlyCSE uses. + inst_iterator Current = llvm::prior(I); + BasicBlock *CurrentBB = Current.getBasicBlockIterator(); + for (BasicBlock::iterator B = CurrentBB->begin(), + J = Current.getInstructionIterator(); + J != B; --J) { + Instruction *EarlierInst = &*llvm::prior(J); + InstructionClass EarlierClass = GetInstructionClass(EarlierInst); + switch (EarlierClass) { + case IC_LoadWeak: + case IC_LoadWeakRetained: { + // If this is loading from the same pointer, replace this load's value + // with that one. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_StoreWeak: + case IC_InitWeak: { + // If this is storing to the same pointer and has the same size etc. + // replace this load's value with the stored value. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_MoveWeak: + case IC_CopyWeak: + // TOOD: Grab the copied value. + goto clobbered; + case IC_AutoreleasepoolPush: + case IC_None: + case IC_User: + // Weak pointers are only modified through the weak entry points + // (and arbitrary calls, which could call the weak entry points). + break; + default: + // Anything else could modify the weak pointer. + goto clobbered; + } + } + clobbered:; + } + + // Then, for each destroyWeak with an alloca operand, check to see if + // the alloca and all its users can be zapped. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_DestroyWeak) + continue; + + CallInst *Call = cast<CallInst>(Inst); + Value *Arg = Call->getArgOperand(0); + if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ++UI) { + Instruction *UserInst = cast<Instruction>(*UI); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + case IC_DestroyWeak: + continue; + default: + goto done; + } + } + Changed = true; + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ) { + CallInst *UserInst = cast<CallInst>(*UI++); + if (!UserInst->use_empty()) + UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + UserInst->eraseFromParent(); + } + Alloca->eraseFromParent(); + done:; + } + } +} + +/// OptimizeSequences - Identify program paths which execute sequences of +/// retains and releases which can be eliminated. +bool ObjCARCOpt::OptimizeSequences(Function &F) { + /// Releases, Retains - These are used to store the results of the main flow + /// analysis. These use Value* as the key instead of Instruction* so that the + /// map stays valid when we get around to rewriting code and calls get + /// replaced by arguments. + DenseMap<Value *, RRInfo> Releases; + MapVector<Value *, RRInfo> Retains; + + /// BBStates, This is used during the traversal of the function to track the + /// states for each identified object at each block. + DenseMap<const BasicBlock *, BBState> BBStates; + + // Analyze the CFG of the function, and all instructions. + bool NestingDetected = Visit(F, BBStates, Retains, Releases); + + // Transform. + return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected; +} + +/// OptimizeReturns - Look for this pattern: +/// +/// %call = call i8* @something(...) +/// %2 = call i8* @objc_retain(i8* %call) +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// +/// And delete the retain and autorelease. +/// +/// Otherwise if it's just this: +/// +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// +/// convert the autorelease to autoreleaseRV. +void ObjCARCOpt::OptimizeReturns(Function &F) { + if (!F.getReturnType()->isPointerTy()) + return; + + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = FI; + ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); + if (!Ret) continue; + + const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); + FindDependencies(NeedsPositiveRetainCount, Arg, + BB, Ret, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Autorelease = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + if (!Autorelease) + goto next_block; + InstructionClass AutoreleaseClass = + GetBasicInstructionClass(Autorelease); + if (!IsAutorelease(AutoreleaseClass)) + goto next_block; + if (GetObjCArg(Autorelease) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Check that there is nothing that can affect the reference + // count between the autorelease and the retain. + FindDependencies(CanChangeRetainCount, Arg, + BB, Autorelease, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Retain = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that we found a retain with the same argument. + if (!Retain || + !IsRetain(GetBasicInstructionClass(Retain)) || + GetObjCArg(Retain) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Convert the autorelease to an autoreleaseRV, since it's + // returning the value. + if (AutoreleaseClass == IC_Autorelease) { + Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); + AutoreleaseClass = IC_AutoreleaseRV; + } + + // Check that there is nothing that can affect the reference + // count between the retain and the call. + FindDependencies(CanChangeRetainCount, Arg, BB, Retain, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Call = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that the pointer is the return value of the call. + if (!Call || Arg != Call) + goto next_block; + + // Check that the call is a regular call. + InstructionClass Class = GetBasicInstructionClass(Call); + if (Class != IC_CallOrUser && Class != IC_Call) + goto next_block; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + EraseInstruction(Retain); + EraseInstruction(Autorelease); + } + } + } + + next_block: + DependingInstructions.clear(); + Visited.clear(); + } +} + +bool ObjCARCOpt::doInitialization(Module &M) { + if (!EnableARCOpts) + return false; + + Run = ModuleHasARC(M); + if (!Run) + return false; + + // Identify the imprecise release metadata kind. + ImpreciseReleaseMDKind = + M.getContext().getMDKindID("clang.imprecise_release"); + + // Identify the declarations for objc_retain and friends. + RetainFunc = M.getFunction("objc_retain"); + RetainBlockFunc = M.getFunction("objc_retainBlock"); + RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue"); + ReleaseFunc = M.getFunction("objc_release"); + + // Intuitively, objc_retain and others are nocapture, however in practice + // they are not, because they return their argument value. And objc_release + // calls finalizers. + + // These are initialized lazily. + RetainRVCallee = 0; + AutoreleaseRVCallee = 0; + ReleaseCallee = 0; + RetainCallee = 0; + AutoreleaseCallee = 0; + + return false; +} + +bool ObjCARCOpt::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // This pass performs several distinct transformations. As a compile-time aid + // when compiling code that isn't ObjC, skip these if the relevant ObjC + // library functions aren't declared. + + // Preliminary optimizations. This also computs UsedInThisFunction. + OptimizeIndividualCalls(F); + + // Optimizations for weak pointers. + if (UsedInThisFunction & ((1 << IC_LoadWeak) | + (1 << IC_LoadWeakRetained) | + (1 << IC_StoreWeak) | + (1 << IC_InitWeak) | + (1 << IC_CopyWeak) | + (1 << IC_MoveWeak) | + (1 << IC_DestroyWeak))) + OptimizeWeakCalls(F); + + // Optimizations for retain+release pairs. + if (UsedInThisFunction & ((1 << IC_Retain) | + (1 << IC_RetainRV) | + (1 << IC_RetainBlock))) + if (UsedInThisFunction & (1 << IC_Release)) + // Run OptimizeSequences until it either stops making changes or + // no retain+release pair nesting is detected. + while (OptimizeSequences(F)) {} + + // Optimizations if objc_autorelease is used. + if (UsedInThisFunction & + ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV))) + OptimizeReturns(F); + + return Changed; +} + +void ObjCARCOpt::releaseMemory() { + PA.clear(); +} + +//===----------------------------------------------------------------------===// +// ARC contraction. +//===----------------------------------------------------------------------===// + +// TODO: ObjCARCContract could insert PHI nodes when uses aren't +// dominated by single calls. + +#include "llvm/Operator.h" +#include "llvm/InlineAsm.h" +#include "llvm/Analysis/Dominators.h" + +STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); + +namespace { + /// ObjCARCContract - Late ARC optimizations. These change the IR in a way + /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late. + class ObjCARCContract : public FunctionPass { + bool Changed; + AliasAnalysis *AA; + DominatorTree *DT; + ProvenanceAnalysis PA; + + /// Run - A flag indicating whether this optimization pass should run. + bool Run; + + /// StoreStrongCallee, etc. - Declarations for ObjC runtime + /// functions, for use in creating calls to them. These are initialized + /// lazily to avoid cluttering up the Module with unused declarations. + Constant *StoreStrongCallee, + *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee; + + /// RetainRVMarker - The inline asm string to insert between calls and + /// RetainRV calls to make the optimization work on targets which need it. + const MDString *RetainRVMarker; + + Constant *getStoreStrongCallee(Module *M); + Constant *getRetainAutoreleaseCallee(Module *M); + Constant *getRetainAutoreleaseRVCallee(Module *M); + + bool ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited); + + void ContractRelease(Instruction *Release, + inst_iterator &Iter); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + public: + static char ID; + ObjCARCContract() : FunctionPass(ID) { + initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCContract::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) + +Pass *llvm::createObjCARCContractPass() { + return new ObjCARCContract(); +} + +void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); +} + +Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { + if (!StoreStrongCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + std::vector<Type *> Params; + Params.push_back(I8XX); + Params.push_back(I8X); + + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + Attributes.addAttr(1, Attribute::NoCapture); + + StoreStrongCallee = + M->getOrInsertFunction( + "objc_storeStrong", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attributes); + } + return StoreStrongCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { + if (!RetainAutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainAutoreleaseCallee = + M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); + } + return RetainAutoreleaseCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { + if (!RetainAutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; + Params.push_back(I8X); + const FunctionType *FTy = + FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes; + Attributes.addAttr(~0u, Attribute::NoUnwind); + RetainAutoreleaseRVCallee = + M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, + Attributes); + } + return RetainAutoreleaseRVCallee; +} + +/// ContractAutorelease - Merge an autorelease with a retain into a fused +/// call. +bool +ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited) { + const Value *Arg = GetObjCArg(Autorelease); + + // Check that there are no instructions between the retain and the autorelease + // (such as an autorelease_pop) which may change the count. + CallInst *Retain = 0; + if (Class == IC_AutoreleaseRV) + FindDependencies(RetainAutoreleaseRVDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + else + FindDependencies(RetainAutoreleaseDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + + Visited.clear(); + if (DependingInstructions.size() != 1) { + DependingInstructions.clear(); + return false; + } + + Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + DependingInstructions.clear(); + + if (!Retain || + GetBasicInstructionClass(Retain) != IC_Retain || + GetObjCArg(Retain) != Arg) + return false; + + Changed = true; + ++NumPeeps; + + if (Class == IC_AutoreleaseRV) + Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); + else + Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); + + EraseInstruction(Autorelease); + return true; +} + +/// ContractRelease - Attempt to merge an objc_release with a store, load, and +/// objc_retain to form an objc_storeStrong. This can be a little tricky because +/// the instructions don't always appear in order, and there may be unrelated +/// intervening instructions. +void ObjCARCContract::ContractRelease(Instruction *Release, + inst_iterator &Iter) { + LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); + if (!Load || Load->isVolatile()) return; + + // For now, require everything to be in one basic block. + BasicBlock *BB = Release->getParent(); + if (Load->getParent() != BB) return; + + // Walk down to find the store. + BasicBlock::iterator I = Load, End = BB->end(); + ++I; + AliasAnalysis::Location Loc = AA->getLocation(Load); + while (I != End && + (&*I == Release || + IsRetain(GetBasicInstructionClass(I)) || + !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) + ++I; + StoreInst *Store = dyn_cast<StoreInst>(I); + if (!Store || Store->isVolatile()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + + Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); + + // Walk up to find the retain. + I = Store; + BasicBlock::iterator Begin = BB->begin(); + while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) + --I; + Instruction *Retain = I; + if (GetBasicInstructionClass(Retain) != IC_Retain) return; + if (GetObjCArg(Retain) != New) return; + + Changed = true; + ++NumStoreStrongs; + + LLVMContext &C = Release->getContext(); + const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + const Type *I8XX = PointerType::getUnqual(I8X); + + Value *Args[] = { Load->getPointerOperand(), New }; + if (Args[0]->getType() != I8XX) + Args[0] = new BitCastInst(Args[0], I8XX, "", Store); + if (Args[1]->getType() != I8X) + Args[1] = new BitCastInst(Args[1], I8X, "", Store); + CallInst *StoreStrong = + CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), + Args, "", Store); + StoreStrong->setDoesNotThrow(); + StoreStrong->setDebugLoc(Store->getDebugLoc()); + + if (&*Iter == Store) ++Iter; + Store->eraseFromParent(); + Release->eraseFromParent(); + EraseInstruction(Retain); + if (Load->use_empty()) + Load->eraseFromParent(); +} + +bool ObjCARCContract::doInitialization(Module &M) { + Run = ModuleHasARC(M); + if (!Run) + return false; + + // These are initialized lazily. + StoreStrongCallee = 0; + RetainAutoreleaseCallee = 0; + RetainAutoreleaseRVCallee = 0; + + // Initialize RetainRVMarker. + RetainRVMarker = 0; + if (NamedMDNode *NMD = + M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) + if (NMD->getNumOperands() == 1) { + const MDNode *N = NMD->getOperand(0); + if (N->getNumOperands() == 1) + if (const MDString *S = dyn_cast<MDString>(N->getOperand(0))) + RetainRVMarker = S; + } + + return false; +} + +bool ObjCARCContract::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // For ObjC library calls which return their argument, replace uses of the + // argument with uses of the call return value, if it dominates the use. This + // reduces register pressure. + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + // Only these library routines return their argument. In particular, + // objc_retainBlock does not necessarily return its argument. + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + break; + case IC_Autorelease: + case IC_AutoreleaseRV: + if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) + continue; + break; + case IC_RetainRV: { + // If we're compiling for a target which needs a special inline-asm + // marker to do the retainAutoreleasedReturnValue optimization, + // insert it now. + if (!RetainRVMarker) + break; + BasicBlock::iterator BBI = Inst; + --BBI; + while (isNoopInstruction(BBI)) --BBI; + if (&*BBI == GetObjCArg(Inst)) { + InlineAsm *IA = + InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), + /*isVarArg=*/false), + RetainRVMarker->getString(), + /*Constraints=*/"", /*hasSideEffects=*/true); + CallInst::Create(IA, "", Inst); + } + break; + } + case IC_InitWeak: { + // objc_initWeak(p, null) => *p = null + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(1))) { + Value *Null = + ConstantPointerNull::get(cast<PointerType>(CI->getType())); + Changed = true; + new StoreInst(Null, CI->getArgOperand(0), CI); + CI->replaceAllUsesWith(Null); + CI->eraseFromParent(); + } + continue; + } + case IC_Release: + ContractRelease(Inst, I); + continue; + default: + continue; + } + + // Don't use GetObjCArg because we don't want to look through bitcasts + // and such; to do the replacement, the argument must have type i8*. + const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); + for (;;) { + // If we're compiling bugpointed code, don't get in trouble. + if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) + break; + // Look through the uses of the pointer. + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ) { + Use &U = UI.getUse(); + unsigned OperandNo = UI.getOperandNo(); + ++UI; // Increment UI now, because we may unlink its element. + if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser())) + if (Inst != UserInst && DT->dominates(Inst, UserInst)) { + Changed = true; + Instruction *Replacement = Inst; + const Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = + PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = + PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + for (unsigned i = 0, e = PHI->getNumIncomingValues(); + i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); + U.set(Replacement); + } + } + } + + // If Arg is a no-op casted pointer, strip one level of casts and + // iterate. + if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) + Arg = BI->getOperand(0); + else if (isa<GEPOperator>(Arg) && + cast<GEPOperator>(Arg)->hasAllZeroIndices()) + Arg = cast<GEPOperator>(Arg)->getPointerOperand(); + else if (isa<GlobalAlias>(Arg) && + !cast<GlobalAlias>(Arg)->mayBeOverridden()) + Arg = cast<GlobalAlias>(Arg)->getAliasee(); + else + break; + } + } + + return Changed; +} diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index c1dfe154ae3f..e6341ae3071f 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // because we can percolate the negate out. Watch for minint, which // cannot be positivified. if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) - if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { + if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); assert(!Duplicates.count(Factor) && "Shouldn't have two constant factors, missed a canonicalize"); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 32a050617432..302c287d3cbd 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -48,7 +48,12 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopUnswitchPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); + initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); + initializeObjCARCAliasAnalysisPass(Registry); + initializeObjCARCExpandPass(Registry); + initializeObjCARCContractPass(Registry); + initializeObjCARCOptPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); initializeSCCPPass(Registry); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 8938b287a840..7d6349cf4e77 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -30,6 +30,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" @@ -152,7 +153,8 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); + static MemTransferInst *isOnlyCopiedFromConstantGlobal( + AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -228,16 +230,30 @@ class ConvertToScalarInfo { /// which means that mem2reg can't promote it. bool IsNotTrivial; + /// ScalarKind - Tracks the kind of alloca being considered for promotion, + /// computed based on the uses of the alloca rather than the LLVM type system. + enum { + Unknown, + + // Accesses via GEPs that are consistent with element access of a vector + // type. This will not be converted into a vector unless there is a later + // access using an actual vector type. + ImplicitVector, + + // Accesses via vector operations and GEPs that are consistent with the + // layout of a vector type. + Vector, + + // An integer bag-of-bits with bitwise operations for insertion and + // extraction. Any combination of types can be converted into this kind + // of scalar. + Integer + } ScalarKind; + /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. - const Type *VectorTy; - - /// HadAVector - True if there is at least one vector access to the alloca. - /// We don't want to turn random arrays into vectors and use vector element - /// insert/extract, but if there are element accesses to something that is - /// also declared as a vector, we do want to promote to a vector. - bool HadAVector; + const VectorType *VectorTy; /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into @@ -246,14 +262,14 @@ class ConvertToScalarInfo { public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), - HadAVector(false), HadNonMemTransferAccess(false) { } + : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), + VectorTy(0), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset); bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); @@ -274,6 +290,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) return 0; + // If an alloca has only memset / memcpy uses, it may still have an Unknown + // ScalarKind. Treat it as an Integer below. + if (ScalarKind == Unknown) + ScalarKind = Integer; + + // FIXME: It should be possible to promote the vector type up to the alloca's + // size. + if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) + ScalarKind = Integer; + // If we were able to find a vector type that can handle this with // insert/extract elements, and if there was at least one use that had // a vector type, promote this to a vector. We don't want to promote @@ -281,14 +307,15 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + if (ScalarKind == Vector) { + assert(VectorTy && "Missing type for vector scalar."); DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { unsigned BitWidth = AllocaSize * 8; - if (!HadAVector && !HadNonMemTransferAccess && - !TD.fitsInLegalInteger(BitWidth)) + if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && + !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) return 0; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); @@ -300,8 +327,9 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { return NewAI; } -/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) -/// so far at the offset specified by Offset (which is specified in bytes). +/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type +/// (VectorTy) so far at the offset specified by Offset (which is specified in +/// bytes). /// /// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. @@ -316,11 +344,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, - bool IsLoadOrStore) { +void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, + uint64_t Offset) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. - if (VectorTy && VectorTy->isVoidTy()) + if (ScalarKind == Integer) return; // If this could be contributing to a vector, analyze it. @@ -336,7 +364,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, // Full width accesses can be ignored, because they can always be turned // into bitcasts. unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (IsLoadOrStore && EltSize == AllocaSize) + if (EltSize == AllocaSize) return; // If we're accessing something that could be an element of a vector, see @@ -345,11 +373,12 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { if (!VectorTy) { + ScalarKind = ImplicitVector; VectorTy = VectorType::get(In, AllocaSize/EltSize); return; } - unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType() + unsigned CurrentEltSize = VectorTy->getElementType() ->getPrimitiveSizeInBits()/8; if (EltSize == CurrentEltSize) return; @@ -361,16 +390,13 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VectorTy = Type::getVoidTy(In->getContext()); + ScalarKind = Integer; } -/// MergeInVectorType - Handles the vector case of MergeInType, returning true -/// if the type was successfully merged and false otherwise. +/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, +/// returning true if the type was successfully merged and false otherwise. bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, uint64_t Offset) { - // Remember if we saw a vector type. - HadAVector = true; - // TODO: Support nonzero offsets? if (Offset != 0) return false; @@ -382,19 +408,22 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, // If this the first vector we see, remember the type so that we know the // element size. if (!VectorTy) { + ScalarKind = Vector; VectorTy = VInTy; return true; } - unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + unsigned BitWidth = VectorTy->getBitWidth(); unsigned InBitWidth = VInTy->getBitWidth(); // Vectors of the same size can be converted using a simple bitcast. - if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) { + ScalarKind = Vector; return true; + } - const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType(); - const Type *InElementTy = cast<VectorType>(VInTy)->getElementType(); + const Type *ElementTy = VectorTy->getElementType(); + const Type *InElementTy = VInTy->getElementType(); // Do not allow mixed integer and floating-point accesses from vectors of // different sizes. @@ -429,6 +458,7 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, } // Pick the largest of the two vector types. + ScalarKind = Vector; if (InBitWidth > BitWidth) VectorTy = VInTy; @@ -456,7 +486,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (LI->getType()->isX86_MMXTy()) return false; HadNonMemTransferAccess = true; - MergeInType(LI->getType(), Offset, true); + MergeInTypeForLoadOrStore(LI->getType(), Offset); continue; } @@ -467,7 +497,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; HadNonMemTransferAccess = true; - MergeInType(SI->getOperand(0)->getType(), Offset, true); + MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); continue; } @@ -498,10 +528,22 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { - // Store of constant value and constant size. - if (!isa<ConstantInt>(MSI->getValue()) || - !isa<ConstantInt>(MSI->getLength())) + // Store of constant value. + if (!isa<ConstantInt>(MSI->getValue())) + return false; + + // Store of constant size. + ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength()); + if (!Len) return false; + + // If the size differs from the alloca, we can only convert the alloca to + // an integer bag-of-bits. + // FIXME: This should handle all of the cases that are currently accepted + // as vector element insertions. + if (Len->getZExtValue() != AllocaSize || Offset != 0) + ScalarKind = Integer; + IsNotTrivial = true; // Can't be mem2reg'd. HadNonMemTransferAccess = true; continue; @@ -1053,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) { namespace { class AllocaPromoter : public LoadAndStorePromoter { AllocaInst *AI; + DIBuilder *DIB; + SmallVector<DbgDeclareInst *, 4> DDIs; + SmallVector<DbgValueInst *, 4> DVIs; public: AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, - DbgDeclareInst *DD, DIBuilder *&DB) - : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {} + DIBuilder *DB) + : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). this->AI = AI; + if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + DDIs.push_back(DDI); + else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) + DVIs.push_back(DVI); + LoadAndStorePromoter::run(Insts); AI->eraseFromParent(); + for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + DDI->eraseFromParent(); + } + for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + DVI->eraseFromParent(); + } } virtual bool isInstInList(Instruction *I, @@ -1071,6 +1134,45 @@ public: return LI->getOperand(0) == AI; return cast<StoreInst>(I)->getPointerOperand() == AI; } + + virtual void updateDebugInfo(Instruction *Inst) const { + for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); + } + for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, + DIVariable(DVI->getVariable()), + SI); + else + DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVariable(DVI->getVariable()), + SI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + Instruction *DbgVal = + DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVariable(DVI->getVariable()), LI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } + } + } }; } // end anon namespace @@ -1262,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { LoadInst *TrueLoad = Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); // Transfer alignment and TBAA info if present. TrueLoad->setAlignment(LI->getAlignment()); @@ -1340,10 +1442,9 @@ bool SROA::performPromotion(Function &F) { DT = &getAnalysis<DominatorTree>(); BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - + DIBuilder DIB(*F.getParent()); bool Changed = false; SmallVector<Instruction*, 64> Insts; - DIBuilder *DIB = 0; while (1) { Allocas.clear(); @@ -1367,11 +1468,7 @@ bool SROA::performPromotion(Function &F) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) Insts.push_back(cast<Instruction>(*UI)); - - DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); - if (DDI && !DIB) - DIB = new DIBuilder(*AI->getParent()->getParent()->getParent()); - AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts); + AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); Insts.clear(); } } @@ -1379,10 +1476,6 @@ bool SROA::performPromotion(Function &F) { Changed = true; } - // FIXME: Is there a better way to handle the lazy initialization of DIB - // so that there doesn't need to be an explicit delete? - delete DIB; - return Changed; } @@ -1403,8 +1496,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the malloc/alloca instructions in the function, removing -// them if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the function, removing them +// if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1438,12 +1531,15 @@ bool SROA::performScalarRepl(Function &F) { // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. - if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); - Constant *TheSrc = cast<Constant>(TheCopy->getSource()); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast<Constant>(Copy->getSource()); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - TheCopy->eraseFromParent(); // Don't mutate the global. + Copy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; Changed = true; @@ -2467,8 +2563,14 @@ static bool PointsToConstantGlobal(Value *V) { /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. -static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset) { +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + bool isOffset, + SmallVector<Instruction *, 4> &LifetimeMarkers) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { User *U = cast<Instruction>(*UI); @@ -2480,7 +2582,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, + LifetimeMarkers)) return false; continue; } @@ -2488,7 +2591,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices())) + isOffset || !GEP->hasAllZeroIndices(), + LifetimeMarkers)) return false; continue; } @@ -2514,6 +2618,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + LifetimeMarkers.push_back(II); + continue; + } + } + // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(U); @@ -2550,9 +2664,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { +MemTransferInst * +SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVector<Instruction*, 4> &ToDelete) { MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) + if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) return TheCopy; return 0; } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 7e9cc807b214..a66b3e38258f 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { static void ChangeToCall(InvokeInst *II) { BasicBlock *BB = II->getParent(); SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(), - Args.end(), "", II); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6247b0348f14..7c415e5150dc 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization { } // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 - const Type *ArgType = Op->getType(); + Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), - Intrinsic::cttz, &ArgType, 1); + Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, Op, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp"); diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 92464e8cf130..b4f74f97e978 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -153,13 +153,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); - // Move all definitions in the successor to the predecessor... - PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); - // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); + // Move all definitions in the successor to the predecessor... + PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); + // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index d6206a3f3326..92ce50030a5d 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -193,44 +193,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. - if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { - // This conceptually does: - // foreach (PHINode *PN in DestBB) - // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); - // but is optimized for two cases. - - if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. - unsigned BBIdx = 0; - for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { - // We no longer enter through TIBB, now we come in through NewBB. - // Revector exactly one entry in the PHI node that used to come from - // TIBB to come from NewBB. - PHINode *PN = cast<PHINode>(I); - - // Reuse the previous value of BBIdx if it lines up. In cases where we - // have multiple phi nodes with *lots* of predecessors, this is a speed - // win because we don't have to scan the PHI looking for TIBB. This - // happens because the BB list of PHI nodes are usually in the same - // order. - if (PN->getIncomingBlock(BBIdx) != TIBB) - BBIdx = PN->getBasicBlockIndex(TIBB); - PN->setIncomingBlock(BBIdx, NewBB); - } - } else { - // However, the foreach loop is slow for blocks with lots of predecessors - // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk - // the user list of TIBB to find the PHI nodes. - SmallPtrSet<PHINode*, 16> UpdatedPHIs; - - for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); - UI != E; ) { - Value::use_iterator Use = UI++; - if (PHINode *PN = dyn_cast<PHINode>(*Use)) { - // Remove one entry from each PHI. - if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) - PN->setOperand(Use.getOperandNo(), NewBB); - } - } + { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); } } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 5b76bb26e404..204c2c63e1a5 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -5,7 +5,6 @@ add_llvm_library(LLVMTransformUtils BreakCriticalEdges.cpp BuildLibCalls.cpp CloneFunction.cpp - CloneLoop.cpp CloneModule.cpp CodeExtractor.cpp DemoteRegToStack.cpp @@ -15,6 +14,7 @@ add_llvm_library(LLVMTransformUtils Local.cpp LoopSimplify.cpp LoopUnroll.cpp + LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index d967ceb96856..6ea831f5345b 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -140,7 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, bool ModuleLevelChanges, ClonedCodeInfo *CodeInfo) { - std::vector<const Type*> ArgTypes; + std::vector<Type*> ArgTypes; // The user might be deleting arguments to the function by specifying them in // the VMap. If so, we need to not add the arguments to the arg ty vector @@ -342,18 +342,6 @@ ConstantFoldMappedInstruction(const Instruction *I) { Ops.size(), TD); } -static DebugLoc -UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL, - LLVMContext &Ctx) { - DebugLoc NewLoc = TheCallDL; - if (MDNode *IA = InsnDL.getInlinedAt(Ctx)) - NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL, - Ctx); - - return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(), - InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx)); -} - /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for @@ -418,50 +406,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { - if (I->hasMetadata()) { - if (!TheCallDL.isUnknown()) { - DebugLoc IDL = I->getDebugLoc(); - if (!IDL.isUnknown()) { - DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, - I->getContext()); - I->setDebugLoc(NewDL); - } - } else { - // The cloned instruction has dbg info but the call instruction - // does not have dbg info. Remove dbg info from cloned instruction. - I->setDebugLoc(DebugLoc()); - } - } + for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) PHIToResolve.push_back(cast<PHINode>(OldI)); - } } - // FIXME: - // FIXME: - // FIXME: Unclone all this metadata stuff. - // FIXME: - // FIXME: - // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) { - if (I->hasMetadata()) { - if (!TheCallDL.isUnknown()) { - DebugLoc IDL = I->getDebugLoc(); - if (!IDL.isUnknown()) { - DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, - I->getContext()); - I->setDebugLoc(NewDL); - } - } else { - // The cloned instruction has dbg info but the call instruction - // does not have dbg info. Remove dbg info from cloned instruction. - I->setDebugLoc(DebugLoc()); - } - } + for (; I != NewBB->end(); ++I) RemapInstruction(I, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); - } } // Defer PHI resolution until rest of function is resolved, PHI resolution @@ -572,12 +524,12 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // removed, so we just need to splice the blocks. BI->eraseFromParent(); - // Move all the instructions in the succ to the pred. - I->getInstList().splice(I->end(), Dest->getInstList()); - // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); + // Move all the instructions in the succ to the pred. + I->getInstList().splice(I->end(), Dest->getInstList()); + // Remove the dest block. Dest->eraseFromParent(); diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp deleted file mode 100644 index 87dd14153a19..000000000000 --- a/lib/Transforms/Utils/CloneLoop.cpp +++ /dev/null @@ -1,128 +0,0 @@ -//===- CloneLoop.cpp - Clone loop nest ------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CloneLoop interface which makes a copy of a loop. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/BasicBlock.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Dominators.h" - - -using namespace llvm; - -/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected -/// that the basic block is already cloned. -static void CloneDominatorInfo(BasicBlock *BB, - ValueToValueMapTy &VMap, - DominatorTree *DT) { - - assert (DT && "DominatorTree is not available"); - ValueToValueMapTy::iterator BI = VMap.find(BB); - assert (BI != VMap.end() && "BasicBlock clone is missing"); - BasicBlock *NewBB = cast<BasicBlock>(BI->second); - - // NewBB already got dominator info. - if (DT->getNode(NewBB)) - return; - - assert (DT->getNode(BB) && "BasicBlock does not have dominator info"); - // Entry block is not expected here. Infinite loops are not to cloned. - assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator"); - BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock(); - - // NewBB's dominator is either BB's dominator or BB's dominator's clone. - BasicBlock *NewBBDom = BBDom; - ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom); - if (BBDomI != VMap.end()) { - NewBBDom = cast<BasicBlock>(BBDomI->second); - if (!DT->getNode(NewBBDom)) - CloneDominatorInfo(BBDom, VMap, DT); - } - DT->addNewBlock(NewBB, NewBBDom); -} - -/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap -/// using old blocks to new blocks mapping. -Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, - ValueToValueMapTy &VMap, Pass *P) { - - DominatorTree *DT = NULL; - if (P) - DT = P->getAnalysisIfAvailable<DominatorTree>(); - - SmallVector<BasicBlock *, 16> NewBlocks; - - // Populate loop nest. - SmallVector<Loop *, 8> LoopNest; - LoopNest.push_back(OrigL); - - - Loop *NewParentLoop = NULL; - do { - Loop *L = LoopNest.pop_back_val(); - Loop *NewLoop = new Loop(); - - if (!NewParentLoop) - NewParentLoop = NewLoop; - - LPM->insertLoop(NewLoop, L->getParentLoop()); - - // Clone Basic Blocks. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone"); - VMap[BB] = NewBB; - if (P) - LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L); - NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - NewBlocks.push_back(NewBB); - } - - // Clone dominator info. - if (DT) - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - CloneDominatorInfo(BB, VMap, DT); - } - - // Process sub loops - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - LoopNest.push_back(*I); - } while (!LoopNest.empty()); - - // Remap instructions to reference operands from VMap. - for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), - NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) { - BasicBlock *NB = *NBItr; - for(BasicBlock::iterator BI = NB->begin(), BE = NB->end(); - BI != BE; ++BI) { - Instruction *Insn = BI; - for (unsigned index = 0, num_ops = Insn->getNumOperands(); - index != num_ops; ++index) { - Value *Op = Insn->getOperand(index); - ValueToValueMapTy::iterator OpItr = VMap.find(Op); - if (OpItr != VMap.end()) - Insn->setOperand(index, OpItr->second); - } - } - } - - BasicBlock *Latch = OrigL->getLoopLatch(); - Function *F = Latch->getParent(); - F->getBasicBlockList().insert(OrigL->getHeader(), - NewBlocks.begin(), NewBlocks.end()); - - - return NewParentLoop; -} diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 1046c38ec01d..a08fa35065cc 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Module.h" #include "llvm/DerivedTypes.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Constant.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -32,20 +31,13 @@ Module *llvm::CloneModule(const Module *M) { return CloneModule(M, VMap); } -Module *llvm::CloneModule(const Module *M, - ValueToValueMapTy &VMap) { - // First off, we need to create the new module... +Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { + // First off, we need to create the new module. Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); - - // Copy all of the type symbol table entries over. - const TypeSymbolTable &TST = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); - TI != TE; ++TI) - New->addTypeName(TI->first, TI->second); - + // Copy all of the dependent libraries over. for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) New->addLibrary(*I); @@ -88,8 +80,7 @@ Module *llvm::CloneModule(const Module *M, I != E; ++I) { GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) - GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - VMap, RF_None))); + GV->setInitializer(MapValue(I->getInitializer(), VMap)); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -119,8 +110,8 @@ Module *llvm::CloneModule(const Module *M, I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); - if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None))); + if (const Constant *C = I->getAliasee()) + GA->setAliasee(MapValue(C, VMap)); } // And named metadata.... @@ -129,8 +120,7 @@ Module *llvm::CloneModule(const Module *M, const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, - RF_None))); + NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 8c133ea7f560..081352358b95 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -258,7 +258,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, default: RetTy = Type::getInt16Ty(header->getContext()); break; } - std::vector<const Type*> paramTy; + std::vector<Type*> paramTy; // Add the types of the input values to the function's argument list for (Values::const_iterator i = inputs.begin(), @@ -279,7 +279,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, } DEBUG(dbgs() << "Function type: " << *RetTy << " f("); - for (std::vector<const Type*>::iterator i = paramTy.begin(), + for (std::vector<Type*>::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) DEBUG(dbgs() << **i << ", "); DEBUG(dbgs() << ")\n"); @@ -403,7 +403,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, AllocaInst *Struct = 0; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector<const Type*> ArgTypes; + std::vector<Type*> ArgTypes; for (Values::iterator v = StructValues.begin(), ve = StructValues.end(); v != ve; ++v) ArgTypes.push_back((*v)->getType()); @@ -429,7 +429,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Emit the call to the function - CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(), + CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); codeReplacer->getInstList().push_back(call); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 8416170d9032..d5b382e55e5c 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -449,11 +449,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i) NewSelector.push_back(Outer->getArgOperand(i)); - CallInst *NewInner = CallInst::Create(Inner->getCalledValue(), - NewSelector.begin(), - NewSelector.end(), - "", - Inner); + CallInst *NewInner = + IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector); // No need to copy attributes, calling convention, etc. NewInner->takeName(Inner); Inner->replaceAllUsesWith(NewInner); @@ -489,8 +486,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, Invoke.getOuterUnwindDest(), - InvokeArgs.begin(), InvokeArgs.end(), - CI->getName(), BB); + InvokeArgs, CI->getName(), BB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -664,7 +660,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, LLVMContext &Context = Arg->getContext(); - const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; @@ -681,10 +677,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, - Tys, 3); + Tys); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); @@ -703,7 +699,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, ConstantInt::get(Type::getInt32Ty(Context), 1), ConstantInt::getFalse(Context) // isVolatile }; - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); + IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); // Uses of the argument in the function should use our new alloca // instead. @@ -734,17 +730,52 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { if (AI->getType() == Int8PtrTy) return isUsedByLifetimeMarker(AI); - // Do a scan to find all the bitcasts to i8*. + // Do a scan to find all the casts to i8*. for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E; ++I) { if (I->getType() != Int8PtrTy) continue; - if (!isa<BitCastInst>(*I)) continue; + if (I->stripPointerCasts() != AI) continue; if (isUsedByLifetimeMarker(*I)) return true; } return false; } +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively +/// update InlinedAtEntry of a DebugLoc. +static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, + const DebugLoc &InlinedAtDL, + LLVMContext &Ctx) { + if (MDNode *IA = DL.getInlinedAt(Ctx)) { + DebugLoc NewInlinedAtDL + = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + NewInlinedAtDL.getAsMDNode(Ctx)); + } + + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + InlinedAtDL.getAsMDNode(Ctx)); +} + + +/// fixupLineNumbers - Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall) { + DebugLoc TheCallDL = TheCall->getDebugLoc(); + if (TheCallDL.isUnknown()) + return; + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) + BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + } + } +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -847,6 +878,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + + // Update inlined instructions' line number information. + fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry @@ -920,13 +954,13 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. - CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", - FirstNewBlock->begin()); + CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); + IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } // Count the number of StackRestore calls we insert. @@ -938,7 +972,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - CallInst::Create(StackRestore, SavedPtr, "", UI); + IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr); ++NumStackRestores; } } @@ -1098,15 +1132,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } + // Update PHI nodes that use the ReturnBB to use the AfterCallBB. + BasicBlock *ReturnBB = Returns[0]->getParent(); + ReturnBB->replaceAllUsesWith(AfterCallBB); + // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. - BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); - // Update PHI nodes that use the ReturnBB to use the AfterCallBB. - ReturnBB->replaceAllUsesWith(AfterCallBB); - // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); @@ -1126,8 +1160,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Splice the code entry block into calling block, right before the // unconditional branch. - OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes + OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 3bdbaa5c09db..0f6d9ae99d66 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -427,10 +427,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); - // Splice all the instructions from PredBB to DestBB. - PredBB->getTerminator()->eraseFromParent(); - DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); - // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -445,6 +441,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); + // Splice all the instructions from PredBB to DestBB. + PredBB->getTerminator()->eraseFromParent(); + DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + if (P) { DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); if (DT) { @@ -536,9 +536,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, -/// potential debug intrinsics and the branch. If possible, eliminate BB by -/// rewriting all the predecessors to branch to the successor block and return -/// true. If we can't transform, return false. +/// potential side-effect free intrinsics and the branch. If possible, +/// eliminate BB by rewriting all the predecessors to branch to the successor +/// block and return true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -613,13 +613,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { } } - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - Succ->getInstList().splice(Succ->begin(), - BB->getInstList(), BB->begin()); - } else { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. assert(PN->use_empty() && "There shouldn't be any uses here!"); PN->eraseFromParent(); @@ -642,7 +644,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { bool Changed = false; // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for + // specially. Theoretically, two phis which are identical except for // one having an undef where the other doesn't could be collapsed. // Map from PHI hash values to PHI nodes. If multiple PHIs have @@ -660,12 +662,17 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // them, which helps expose duplicates, but we have to check all the // operands to be safe in case instcombine hasn't run. uintptr_t Hash = 0; + // This hash algorithm is quite weak as hash functions go, but it seems + // to do a good enough job for this particular purpose, and is very quick. for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { - // This hash algorithm is quite weak as hash functions go, but it seems - // to do a good enough job for this particular purpose, and is very quick. Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); } + for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end(); + I != E; ++I) { + Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. Hash >>= 1; // If we've never seen this hash value before, it's a unique PHI. @@ -706,39 +713,15 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned PrefAlign) { + V = V->stripPointerCasts(); - User *U = dyn_cast<User>(V); - if (!U) return Align; - - switch (Operator::getOpcode(U)) { - default: break; - case Instruction::BitCast: - return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - case Instruction::GetElementPtr: { - // If all indexes are zero, it is just the alignment of the base pointer. - bool AllZeroOperands = true; - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i) - if (!isa<Constant>(*i) || - !cast<Constant>(*i)->isNullValue()) { - AllZeroOperands = false; - break; - } - - if (AllZeroOperands) { - // Treat this like a bitcast. - return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign); - } - return Align; - } - case Instruction::Alloca: { - AllocaInst *AI = cast<AllocaInst>(V); + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) return AI->getAlignment(); AI->setAlignment(PrefAlign); return PrefAlign; } - } if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // If there is a large requested alignment and we can, bump up the alignment diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index f02ffd20bca9..e79fb5ac21b4 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -375,6 +375,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), ".preheader", this); + NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() << "\n"); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 7da7271e642c..6772511b5d5a 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -47,6 +47,14 @@ static inline void RemapInstruction(Instruction *I, if (It != VMap.end()) I->setOperand(op, It->second); } + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i)); + if (It != VMap.end()) + PN->setIncomingBlock(i, cast<BasicBlock>(It->second)); + } + } } /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it @@ -75,13 +83,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { // Delete the unconditional branch from the predecessor... OnlyPred->getInstList().pop_back(); - // Move all definitions in the successor to the predecessor... - OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); - // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(OnlyPred); + // Move all definitions in the successor to the predecessor... + OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList()); + std::string OldName = BB->getName(); // Erase basic block from the function... @@ -247,16 +255,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // the successor of the latch block. The successor of the exit block will // be updated specially after unrolling all the way. if (*BB != LatchBlock) - for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end(); - UI != UE;) { - Instruction *UseInst = cast<Instruction>(*UI); - ++UI; - if (isa<PHINode>(UseInst) && !L->contains(UseInst)) { - PHINode *phi = cast<PHINode>(UseInst); - Value *Incoming = phi->getIncomingValueForBlock(*BB); - phi->addIncoming(Incoming, New); - } - } + for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE; + ++SI) + if (!L->contains(*SI)) + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { + Value *Incoming = phi->getIncomingValueForBlock(*BB); + phi->addIncoming(Incoming, New); + } // Keep track of new headers and latches as we create them, so that // we can insert the proper branches later. @@ -288,24 +294,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // successor blocks, update them to use the appropriate values computed as the // last iteration of the loop. if (Count != 1) { - SmallPtrSet<PHINode*, 8> Users; - for (Value::use_iterator UI = LatchBlock->use_begin(), - UE = LatchBlock->use_end(); UI != UE; ++UI) - if (PHINode *phi = dyn_cast<PHINode>(*UI)) - Users.insert(phi); - BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]); - for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end(); + for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock); SI != SE; ++SI) { - PHINode *PN = *SI; - Value *InVal = PN->removeIncomingValue(LatchBlock, false); - // If this value was defined in the loop, take the value defined by the - // last iteration of the loop. - if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI)) - InVal = LastValueMap[InVal]; + for (BasicBlock::iterator BBI = (*SI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + Value *InVal = PN->removeIncomingValue(LatchBlock, false); + // If this value was defined in the loop, take the value defined by the + // last iteration of the loop. + if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { + if (L->contains(InValI)) + InVal = LastValueMap[InVal]; + } + PN->addIncoming(InVal, LastIterationBB); } - PN->addIncoming(InVal, LastIterationBB); } } @@ -352,11 +354,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, // Replace the conditional branch with an unconditional one. BranchInst::Create(Dest, Term); Term->eraseFromParent(); - // Merge adjacent basic blocks, if possible. - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) { + } + } + + // Merge adjacent basic blocks, if possible. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator()); + if (Term->isUnconditional()) { + BasicBlock *Dest = Term->getSuccessor(0); + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); - std::replace(Headers.begin(), Headers.end(), Dest, Fold); - } } } diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp new file mode 100644 index 000000000000..c1213fac7bc7 --- /dev/null +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -0,0 +1,166 @@ +#define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/BasicBlock.h" +#include "llvm/LLVMContext.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <vector> + +using namespace llvm; + +STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled"); + +static cl::opt<uint32_t> +LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), + cl::desc("Weight of the branch likely to be taken (default = 64)")); +static cl::opt<uint32_t> +UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), + cl::desc("Weight of the branch unlikely to be taken (default = 4)")); + +namespace { + + class LowerExpectIntrinsic : public FunctionPass { + + bool HandleSwitchExpect(SwitchInst *SI); + + bool HandleIfExpect(BranchInst *BI); + + public: + static char ID; + LowerExpectIntrinsic() : FunctionPass(ID) { + initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + }; +} + + +bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { + CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + const Type *Int32Ty = Type::getInt32Ty(Context); + + unsigned caseNo = SI->findCaseValue(ExpectedValue); + std::vector<Value *> Vec; + unsigned n = SI->getNumCases(); + Vec.resize(n + 1); // +1 for MDString + + Vec[0] = MDString::get(Context, "branch_weights"); + for (unsigned i = 0; i < n; ++i) { + Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight); + } + + MDNode *WeightsNode = llvm::MDNode::get(Context, Vec); + SI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + SI->setCondition(ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { + if (BI->isUnconditional()) + return false; + + // Handle non-optimized IR code like: + // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1) + // %tobool = icmp ne i64 %expval, 0 + // br i1 %tobool, label %if.then, label %if.end + + ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); + if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE) + return false; + + CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0)); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + const Type *Int32Ty = Type::getInt32Ty(Context); + bool Likely = ExpectedValue->isOne(); + + // If expect value is equal to 1 it means that we are more likely to take + // branch 0, in other case more likely is branch 1. + Value *Ops[] = { + MDString::get(Context, "branch_weights"), + ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight), + ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight) + }; + + MDNode *WeightsNode = MDNode::get(Context, Ops); + BI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + CmpI->setOperand(0, ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::runOnFunction(Function &F) { + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // Create "block_weights" metadata. + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (HandleIfExpect(BI)) + IfHandled++; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (HandleSwitchExpect(SI)) + IfHandled++; + } + + // remove llvm.expect intrinsics. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ) { + CallInst *CI = dyn_cast<CallInst>(BI++); + if (!CI) + continue; + + Function *Fn = CI->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + Value *Exp = CI->getArgOperand(0); + CI->replaceAllUsesWith(Exp); + CI->eraseFromParent(); + } + } + } + + return false; +} + + +char LowerExpectIntrinsic::ID = 0; +INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " + "Intrinsics", false, false) + +FunctionPass *llvm::createLowerExpectIntrinsicPass() { + return new LowerExpectIntrinsic(); +} diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 025ae0d61696..f77d19de900d 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -66,7 +66,7 @@ namespace { Constant *AbortFn; // Used for expensive EH support. - const Type *JBLinkTy; + StructType *JBLinkTy; GlobalVariable *JBListHead; Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; @@ -120,24 +120,16 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = - Type::getInt8PtrTy(M.getContext()); + const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; - const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); - - { // The type is recursive, so use a type holder. - std::vector<const Type*> Elements; - Elements.push_back(JmpBufTy); - OpaqueType *OT = OpaqueType::get(M.getContext()); - Elements.push_back(PointerType::getUnqual(OT)); - PATypeHolder JBLType(StructType::get(M.getContext(), Elements)); - OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. - JBLinkTy = JBLType.get(); - M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); - } + Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty"); + Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; + JBLinkTy->setBody(Elts); const Type *PtrJBList = PointerType::getUnqual(JBLinkTy); @@ -184,8 +176,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), - "",II); + CallArgs, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -265,8 +256,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, // Insert a normal call instruction. SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), "", - II); + CallArgs, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -573,7 +563,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock); + CallInst::Create(LongJmpFn, Idx, "", UnwindBlock); new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a1736b931fb4..e5a00f4e9774 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -92,6 +93,22 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return false; // Don't allow a store OF the AI, only INTO the AI. if (SI->isVolatile()) return false; + } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; } else { return false; } @@ -335,6 +352,31 @@ namespace { }; } // end of anonymous namespace +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast<Instruction>(*UI); + ++UI; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast<Instruction>(*UI); + ++UI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); @@ -353,6 +395,8 @@ void PromoteMem2Reg::run() { assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); + removeLifetimeIntrinsicUsers(AI); + if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b336194a35e3..b47a7ccd80ba 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -16,7 +16,6 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" @@ -358,8 +357,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { LoadAndStorePromoter:: LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, - SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB, - StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) { + SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; Value *SomeVal; @@ -407,8 +405,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - if (DDI) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); } else // Otherwise it is a load, queue it to rewrite as a live-in load. @@ -462,9 +459,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { if (StoreInst *SI = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; - - if (DDI) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + updateDebugInfo(SI); // Remember that this is the active value in the block. StoredValue = SI->getOperand(0); @@ -522,7 +517,4 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { instructionDeleted(User); User->eraseFromParent(); } - - if (DDI) - DDI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 6df846cbd18f..9d9c324b8468 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2211,8 +2211,7 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName()); + Args, II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the Call now does instead. @@ -2355,8 +2354,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName()); + Args, II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the call does now instead. @@ -2450,6 +2448,77 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { return !DeadCases.empty(); } +/// FindPHIForConditionForwarding - If BB would be eligible for simplification +/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated +/// by an unconditional branch), look at the phi node for BB in the successor +/// block and see if the incoming value is equal to CaseValue. If so, return +/// the phi node, and set PhiIndex to BB's index in the phi node. +static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, + BasicBlock *BB, + int *PhiIndex) { + if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) + return NULL; // BB must be empty to be a candidate for simplification. + if (!BB->getSinglePredecessor()) + return NULL; // BB must be dominated by the switch. + + BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Branch || !Branch->isUnconditional()) + return NULL; // Terminator must be unconditional branch. + + BasicBlock *Succ = Branch->getSuccessor(0); + + BasicBlock::iterator I = Succ->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(BB); + assert(Idx >= 0 && "PHI has no entry for predecessor?"); + + Value *InValue = PHI->getIncomingValue(Idx); + if (InValue != CaseValue) continue; + + *PhiIndex = Idx; + return PHI; + } + + return NULL; +} + +/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch +/// instruction to a phi node dominated by the switch, if that would mean that +/// some of the destination blocks of the switch can be folded away. +/// Returns true if a change is made. +static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { + typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; + ForwardingNodesMap ForwardingNodes; + + for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case. + ConstantInt *CaseValue = SI->getCaseValue(I); + BasicBlock *CaseDest = SI->getSuccessor(I); + + int PhiIndex; + PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, + &PhiIndex); + if (!PHI) continue; + + ForwardingNodes[PHI].push_back(PhiIndex); + } + + bool Changed = false; + + for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), + E = ForwardingNodes.end(); I != E; ++I) { + PHINode *Phi = I->first; + SmallVector<int,4> &Indexes = I->second; + + if (Indexes.size() < 2) continue; + + for (size_t I = 0, E = Indexes.size(); I != E; ++I) + Phi->setIncomingValue(Indexes[I], SI->getCondition()); + Changed = true; + } + + return Changed; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -2486,6 +2555,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (EliminateDeadSwitchCases(SI)) return SimplifyCFG(BB) | true; + if (ForwardSwitchConditionToPHI(SI)) + return SimplifyCFG(BB) | true; + return false; } @@ -2530,7 +2602,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); // If the Terminator is the only non-phi instruction, simplify the block. - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a73bf0449813..973b105a1cbb 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,15 +13,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" #include "llvm/Metadata.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, - RemapFlags Flags) { +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::Anchor() {} + +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper) { ValueToValueMapTy::iterator I = VM.find(V); // If the value already exists in the map, use it. @@ -29,8 +32,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V)) + if (isa<GlobalValue>(V) || isa<MDString>(V)) return VM[V] = const_cast<Value*>(V); + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack()); + } + + return VM[V] = const_cast<Value*>(V); + } + if (const MDNode *MD = dyn_cast<MDNode>(V)) { // If this is a module-level metadata and we know that nothing at the module @@ -45,14 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue; + if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; // Ok, at least one operand needs remapping. SmallVector<Value*, 4> Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); + Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); @@ -75,51 +93,75 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, return 0; if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { - Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags)); + Function *F = + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper)); BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, - Flags)); + Flags, TypeMapper)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - Value *Op = C->getOperand(i); - Value *Mapped = MapValue(Op, VM, Flags); - if (Mapped == C) continue; - - // Okay, the operands don't all match. We've already processed some or all - // of the operands, set them up now. - std::vector<Constant*> Ops; - Ops.reserve(C->getNumOperands()); - for (unsigned j = 0; j != i; ++j) - Ops.push_back(cast<Constant>(C->getOperand(i))); + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = 0; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = MapValue(Op, VM, Flags, TypeMapper); + if (Mapped != C) break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return VM[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { Ops.push_back(cast<Constant>(Mapped)); - + // Map the rest of the operands that aren't processed yet. - for (++i; i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags))); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - return VM[V] = CE->getWithOperands(Ops); - if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) - return VM[V] = ConstantArray::get(CA->getType(), Ops); - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) - return VM[V] = ConstantStruct::get(CS->getType(), Ops); - assert(isa<ConstantVector>(C) && "Unknown mapped constant type"); - return VM[V] = ConstantVector::get(Ops); + for (++OpNo; OpNo != NumOperands; ++OpNo) + Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, + Flags, TypeMapper)); } - - // If we reach here, all of the operands of the constant match. - return VM[V] = C; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return VM[V] = CE->getWithOperands(Ops, NewTy); + if (isa<ConstantArray>(C)) + return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return VM[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return VM[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return VM[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); } /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - RemapFlags Flags) { + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){ // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, Flags); + Value *V = MapValue(*op, VMap, Flags, TypeMapper); // If we aren't ignoring missing entries, assert that something happened. if (V != 0) *op = V; @@ -128,14 +170,32 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, "Referenced value not in value map!"); } - // Remap attached metadata. + // Remap phi nodes' incoming blocks. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); + // If we aren't ignoring missing entries, assert that something happened. + if (V != 0) + PN->setIncomingBlock(i, cast<BasicBlock>(V)); + else + assert((Flags & RF_IgnoreMissingEntries) && + "Referenced block not in value map!"); + } + } + + // Remap attached metadata. Don't bother remapping DebugLoc, it can never + // have mappings to do. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - I->getAllMetadata(MDs); + I->getAllMetadataOtherThanDebugLoc(MDs); for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { - Value *Old = MI->second; - Value *New = MapValue(Old, VMap, Flags); + MDNode *Old = MI->second; + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper); if (New != Old) - I->setMetadata(MI->first, cast<MDNode>(New)); + I->setMetadata(MI->first, New); } + + // If the instruction's type is being remapped, do so now. + if (TypeMapper) + I->mutateType(TypeMapper->remapType(I->getType())); } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 844284d09c72..94794c35fe0b 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -26,13 +26,11 @@ #include "llvm/Operator.h" #include "llvm/Module.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -42,11 +40,6 @@ #include <cctype> using namespace llvm; -static cl::opt<bool> -EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden, - cl::desc("Enable debug info comments")); - - // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} @@ -140,60 +133,60 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) { // TypePrinting Class: Type printing machinery //===----------------------------------------------------------------------===// -static DenseMap<const Type *, std::string> &getTypeNamesMap(void *M) { - return *static_cast<DenseMap<const Type *, std::string>*>(M); -} - -void TypePrinting::clear() { - getTypeNamesMap(TypeNames).clear(); -} +/// TypePrinting - Type printing machinery. +namespace { +class TypePrinting { + TypePrinting(const TypePrinting &); // DO NOT IMPLEMENT + void operator=(const TypePrinting&); // DO NOT IMPLEMENT +public: -bool TypePrinting::hasTypeName(const Type *Ty) const { - return getTypeNamesMap(TypeNames).count(Ty); -} + /// NamedTypes - The named types that are used by the current module. + std::vector<StructType*> NamedTypes; + + /// NumberedTypes - The numbered types, along with their value. + DenseMap<StructType*, unsigned> NumberedTypes; + -void TypePrinting::addTypeName(const Type *Ty, const std::string &N) { - getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N)); -} + TypePrinting() {} + ~TypePrinting() {} + + void incorporateTypes(const Module &M); + + void print(Type *Ty, raw_ostream &OS); + + void printStructBody(StructType *Ty, raw_ostream &OS); +}; +} // end anonymous namespace. -TypePrinting::TypePrinting() { - TypeNames = new DenseMap<const Type *, std::string>(); +void TypePrinting::incorporateTypes(const Module &M) { + M.findUsedStructTypes(NamedTypes); + + // The list of struct types we got back includes all the struct types, split + // the unnamed ones out to a numbering and remove the anonymous structs. + unsigned NextNumber = 0; + + std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E; + for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) { + StructType *STy = *I; + + // Ignore anonymous types. + if (STy->isAnonymous()) + continue; + + if (STy->getName().empty()) + NumberedTypes[STy] = NextNumber++; + else + *NextToUse++ = STy; + } + + NamedTypes.erase(NextToUse, NamedTypes.end()); } -TypePrinting::~TypePrinting() { - delete &getTypeNamesMap(TypeNames); -} /// CalcTypeName - Write the specified type to the specified raw_ostream, making /// use of type names or up references to shorten the type name where possible. -void TypePrinting::CalcTypeName(const Type *Ty, - SmallVectorImpl<const Type *> &TypeStack, - raw_ostream &OS, bool IgnoreTopLevelName) { - // Check to see if the type is named. - if (!IgnoreTopLevelName) { - DenseMap<const Type *, std::string> &TM = getTypeNamesMap(TypeNames); - DenseMap<const Type *, std::string>::iterator I = TM.find(Ty); - if (I != TM.end()) { - OS << I->second; - return; - } - } - - // Check to see if the Type is already on the stack... - unsigned Slot = 0, CurSize = TypeStack.size(); - while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type - - // This is another base case for the recursion. In this case, we know - // that we have looped back to a type that we have previously visited. - // Generate the appropriate upreference to handle this. - if (Slot < CurSize) { - OS << '\\' << unsigned(CurSize-Slot); // Here's the upreference - return; - } - - TypeStack.push_back(Ty); // Recursive case: Add us to the stack.. - +void TypePrinting::print(Type *Ty, raw_ostream &OS) { switch (Ty->getTypeID()) { case Type::VoidTyID: OS << "void"; break; case Type::FloatTyID: OS << "float"; break; @@ -206,259 +199,96 @@ void TypePrinting::CalcTypeName(const Type *Ty, case Type::X86_MMXTyID: OS << "x86_mmx"; break; case Type::IntegerTyID: OS << 'i' << cast<IntegerType>(Ty)->getBitWidth(); - break; + return; case Type::FunctionTyID: { - const FunctionType *FTy = cast<FunctionType>(Ty); - CalcTypeName(FTy->getReturnType(), TypeStack, OS); + FunctionType *FTy = cast<FunctionType>(Ty); + print(FTy->getReturnType(), OS); OS << " ("; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I) { if (I != FTy->param_begin()) OS << ", "; - CalcTypeName(*I, TypeStack, OS); + print(*I, OS); } if (FTy->isVarArg()) { if (FTy->getNumParams()) OS << ", "; OS << "..."; } OS << ')'; - break; + return; } case Type::StructTyID: { - const StructType *STy = cast<StructType>(Ty); - if (STy->isPacked()) - OS << '<'; - OS << '{'; - for (StructType::element_iterator I = STy->element_begin(), - E = STy->element_end(); I != E; ++I) { - OS << ' '; - CalcTypeName(*I, TypeStack, OS); - if (llvm::next(I) == STy->element_end()) - OS << ' '; - else - OS << ','; - } - OS << '}'; - if (STy->isPacked()) - OS << '>'; - break; + StructType *STy = cast<StructType>(Ty); + + if (STy->isAnonymous()) + return printStructBody(STy, OS); + + if (!STy->getName().empty()) + return PrintLLVMName(OS, STy->getName(), LocalPrefix); + + DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy); + if (I != NumberedTypes.end()) + OS << '%' << I->second; + else // Not enumerated, print the hex address. + OS << "%\"type 0x" << STy << '\"'; + return; } case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(Ty); - CalcTypeName(PTy->getElementType(), TypeStack, OS); + PointerType *PTy = cast<PointerType>(Ty); + print(PTy->getElementType(), OS); if (unsigned AddressSpace = PTy->getAddressSpace()) OS << " addrspace(" << AddressSpace << ')'; OS << '*'; - break; + return; } case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); + ArrayType *ATy = cast<ArrayType>(Ty); OS << '[' << ATy->getNumElements() << " x "; - CalcTypeName(ATy->getElementType(), TypeStack, OS); + print(ATy->getElementType(), OS); OS << ']'; - break; + return; } case Type::VectorTyID: { - const VectorType *PTy = cast<VectorType>(Ty); + VectorType *PTy = cast<VectorType>(Ty); OS << "<" << PTy->getNumElements() << " x "; - CalcTypeName(PTy->getElementType(), TypeStack, OS); + print(PTy->getElementType(), OS); OS << '>'; - break; + return; } - case Type::OpaqueTyID: - OS << "opaque"; - break; default: OS << "<unrecognized-type>"; - break; + return; } - - TypeStack.pop_back(); // Remove self from stack. } -/// printTypeInt - The internal guts of printing out a type that has a -/// potentially named portion. -/// -void TypePrinting::print(const Type *Ty, raw_ostream &OS, - bool IgnoreTopLevelName) { - // Check to see if the type is named. - DenseMap<const Type*, std::string> &TM = getTypeNamesMap(TypeNames); - if (!IgnoreTopLevelName) { - DenseMap<const Type*, std::string>::iterator I = TM.find(Ty); - if (I != TM.end()) { - OS << I->second; - return; - } +void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { + if (STy->isOpaque()) { + OS << "opaque"; + return; } - - // Otherwise we have a type that has not been named but is a derived type. - // Carefully recurse the type hierarchy to print out any contained symbolic - // names. - SmallVector<const Type *, 16> TypeStack; - std::string TypeName; - - raw_string_ostream TypeOS(TypeName); - CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName); - OS << TypeOS.str(); - - // Cache type name for later use. - if (!IgnoreTopLevelName) - TM.insert(std::make_pair(Ty, TypeOS.str())); -} - -namespace { - class TypeFinder { - // To avoid walking constant expressions multiple times and other IR - // objects, we keep several helper maps. - DenseSet<const Value*> VisitedConstants; - DenseSet<const Type*> VisitedTypes; - - TypePrinting &TP; - std::vector<const Type*> &NumberedTypes; - public: - TypeFinder(TypePrinting &tp, std::vector<const Type*> &numberedTypes) - : TP(tp), NumberedTypes(numberedTypes) {} - - void Run(const Module &M) { - // Get types from the type symbol table. This gets opaque types referened - // only through derived named types. - const TypeSymbolTable &ST = M.getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end(); - TI != E; ++TI) - IncorporateType(TI->second); - - // Get types from global variables. - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - IncorporateType(I->getType()); - if (I->hasInitializer()) - IncorporateValue(I->getInitializer()); - } - - // Get types from aliases. - for (Module::const_alias_iterator I = M.alias_begin(), - E = M.alias_end(); I != E; ++I) { - IncorporateType(I->getType()); - IncorporateValue(I->getAliasee()); - } - - // Get types from functions. - for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { - IncorporateType(FI->getType()); - - for (Function::const_iterator BB = FI->begin(), E = FI->end(); - BB != E;++BB) - for (BasicBlock::const_iterator II = BB->begin(), - E = BB->end(); II != E; ++II) { - const Instruction &I = *II; - // Incorporate the type of the instruction and all its operands. - IncorporateType(I.getType()); - for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); - OI != OE; ++OI) - IncorporateValue(*OI); - } - } - } - - private: - void IncorporateType(const Type *Ty) { - // Check to see if we're already visited this type. - if (!VisitedTypes.insert(Ty).second) - return; - - // If this is a structure or opaque type, add a name for the type. - if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements()) - || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) { - TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size()))); - NumberedTypes.push_back(Ty); - } - - // Recursively walk all contained types. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) - IncorporateType(*I); - } - - /// IncorporateValue - This method is used to walk operand lists finding - /// types hiding in constant expressions and other operands that won't be - /// walked in other ways. GlobalValues, basic blocks, instructions, and - /// inst operands are all explicitly enumerated. - void IncorporateValue(const Value *V) { - if (V == 0 || !isa<Constant>(V) || isa<GlobalValue>(V)) return; - - // Already visited? - if (!VisitedConstants.insert(V).second) - return; - - // Check this type. - IncorporateType(V->getType()); - - // Look in operands for types. - const Constant *C = cast<Constant>(V); - for (Constant::const_op_iterator I = C->op_begin(), - E = C->op_end(); I != E;++I) - IncorporateValue(*I); - } - }; -} // end anonymous namespace - - -/// AddModuleTypesToPrinter - Add all of the symbolic type names for types in -/// the specified module to the TypePrinter and all numbered types to it and the -/// NumberedTypes table. -static void AddModuleTypesToPrinter(TypePrinting &TP, - std::vector<const Type*> &NumberedTypes, - const Module *M) { - if (M == 0) return; - - // If the module has a symbol table, take all global types and stuff their - // names into the TypeNames map. - const TypeSymbolTable &ST = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end(); - TI != E; ++TI) { - const Type *Ty = cast<Type>(TI->second); - - // As a heuristic, don't insert pointer to primitive types, because - // they are used too often to have a single useful name. - if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { - const Type *PETy = PTy->getElementType(); - if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) && - !PETy->isOpaqueTy()) - continue; + + if (STy->isPacked()) + OS << '<'; + + if (STy->getNumElements() == 0) { + OS << "{}"; + } else { + StructType::element_iterator I = STy->element_begin(); + OS << "{ "; + print(*I++, OS); + for (StructType::element_iterator E = STy->element_end(); I != E; ++I) { + OS << ", "; + print(*I, OS); } - - // Likewise don't insert primitives either. - if (Ty->isIntegerTy() || Ty->isPrimitiveType()) - continue; - - // Get the name as a string and insert it into TypeNames. - std::string NameStr; - raw_string_ostream NameROS(NameStr); - formatted_raw_ostream NameOS(NameROS); - PrintLLVMName(NameOS, TI->first, LocalPrefix); - NameOS.flush(); - TP.addTypeName(Ty, NameStr); + + OS << " }"; } - - // Walk the entire module to find references to unnamed structure and opaque - // types. This is required for correctness by opaque types (because multiple - // uses of an unnamed opaque type needs to be referred to by the same ID) and - // it shrinks complex recursive structure types substantially in some cases. - TypeFinder(TP, NumberedTypes).Run(*M); + if (STy->isPacked()) + OS << '>'; } -/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic -/// type, iff there is an entry in the modules symbol table for the specified -/// type or one of it's component types. -/// -void llvm::WriteTypeSymbolic(raw_ostream &OS, const Type *Ty, const Module *M) { - TypePrinting Printer; - std::vector<const Type*> NumberedTypes; - AddModuleTypesToPrinter(Printer, NumberedTypes, M); - Printer.print(Ty, OS); -} //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values @@ -481,11 +311,11 @@ private: const Function* TheFunction; bool FunctionProcessed; - /// mMap - The TypePlanes map for the module level data. + /// mMap - The slot map for the module level data. ValueMap mMap; unsigned mNext; - /// fMap - The TypePlanes map for the function level data. + /// fMap - The slot map for the function level data. ValueMap fMap; unsigned fNext; @@ -706,7 +536,7 @@ int SlotTracker::getGlobalSlot(const GlobalValue *V) { // Check for uninitialized state and do lazy initialization. initialize(); - // Find the type plane in the module map + // Find the value in the module map ValueMap::iterator MI = mMap.find(V); return MI == mMap.end() ? -1 : (int)MI->second; } @@ -716,7 +546,7 @@ int SlotTracker::getMetadataSlot(const MDNode *N) { // Check for uninitialized state and do lazy initialization. initialize(); - // Find the type plane in the module map + // Find the MDNode in the module map mdn_iterator MI = mdnMap.find(N); return MI == mdnMap.end() ? -1 : (int)MI->second; } @@ -978,7 +808,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // As a special case, print the array as a string if it is an array of // i8 with ConstantInt values. // - const Type *ETy = CA->getType()->getElementType(); + Type *ETy = CA->getType()->getElementType(); if (CA->isString()) { Out << "c\""; PrintEscapedString(CA->getAsString(), Out); @@ -1035,7 +865,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { - const Type *ETy = CP->getType()->getElementType(); + Type *ETy = CP->getType()->getElementType(); assert(CP->getNumOperands() > 0 && "Number of operands for a PackedConst must be > 0"); Out << '<'; @@ -1233,8 +1063,8 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, if (Context == 0) Context = getModuleFromVal(V); TypePrinting TypePrinter; - std::vector<const Type*> NumberedTypes; - AddModuleTypesToPrinter(TypePrinter, NumberedTypes, Context); + if (Context) + TypePrinter.incorporateTypes(*Context); if (PrintType) { TypePrinter.print(V->getType(), Out); Out << ' '; @@ -1251,14 +1081,14 @@ class AssemblyWriter { const Module *TheModule; TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; - std::vector<const Type*> NumberedTypes; public: inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M, AssemblyAnnotationWriter *AAW) : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { - AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); + if (M) + TypePrinter.incorporateTypes(*M); } void printMDNodeBody(const MDNode *MD); @@ -1271,7 +1101,7 @@ public: void writeAllMDNodes(); - void printTypeSymbolTable(const TypeSymbolTable &ST); + void printTypeIdentities(); void printGlobal(const GlobalVariable *GV); void printAlias(const GlobalAlias *GV); void printFunction(const Function *F); @@ -1366,9 +1196,7 @@ void AssemblyWriter::printModule(const Module *M) { Out << " ]"; } - // Loop over the symbol table, emitting all id'd types. - if (!M->getTypeSymbolTable().empty() || !NumberedTypes.empty()) Out << '\n'; - printTypeSymbolTable(M->getTypeSymbolTable()); + printTypeIdentities(); // Output all globals. if (!M->global_empty()) Out << '\n'; @@ -1401,7 +1229,25 @@ void AssemblyWriter::printModule(const Module *M) { } void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { - Out << "!" << NMD->getName() << " = !{"; + Out << '!'; + StringRef Name = NMD->getName(); + if (Name.empty()) { + Out << "<empty name> "; + } else { + if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' || + Name[0] == '.' || Name[0] == '_') + Out << Name[0]; + else + Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F); + for (unsigned i = 1, e = Name.size(); i != e; ++i) { + unsigned char C = Name[i]; + if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_') + Out << C; + else + Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); + } + } + Out << " = !{"; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { if (i) Out << ", "; int Slot = Machine.getMetadataSlot(NMD->getOperand(i)); @@ -1508,7 +1354,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { const Constant *Aliasee = GA->getAliasee(); - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) { + if (Aliasee == 0) { + TypePrinter.print(GA->getType(), Out); + Out << " <<NULL ALIASEE>>"; + } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) { TypePrinter.print(GV->getType(), Out); Out << ' '; PrintLLVMName(Out, GV); @@ -1534,26 +1383,40 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { Out << '\n'; } -void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) { +void AssemblyWriter::printTypeIdentities() { + if (TypePrinter.NumberedTypes.empty() && + TypePrinter.NamedTypes.empty()) + return; + + Out << '\n'; + + // We know all the numbers that each type is used and we know that it is a + // dense assignment. Convert the map to an index table. + std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size()); + for (DenseMap<StructType*, unsigned>::iterator I = + TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end(); + I != E; ++I) { + assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?"); + NumberedTypes[I->second] = I->first; + } + // Emit all numbered types. for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) { Out << '%' << i << " = type "; - + // Make sure we print out at least one level of the type structure, so // that we do not get %2 = type %2 - TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out); + TypePrinter.printStructBody(NumberedTypes[i], Out); Out << '\n'; } - - // Print the named types. - for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end(); - TI != TE; ++TI) { - PrintLLVMName(Out, TI->first, LocalPrefix); + + for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) { + PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix); Out << " = type "; // Make sure we print out at least one level of the type structure, so // that we do not get %FILE = type %FILE - TypePrinter.printAtLeastOneLevel(TI->second, Out); + TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out); Out << '\n'; } } @@ -1735,18 +1598,6 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out); } -/// printDebugLoc - Print DebugLoc. -static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { - OS << DL.getLine() << ":" << DL.getCol(); - if (MDNode *N = DL.getInlinedAt(getGlobalContext())) { - DebugLoc IDL = DebugLoc::getFromDILocation(N); - if (!IDL.isUnknown()) { - OS << "@"; - printDebugLoc(IDL,OS); - } - } -} - /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. /// @@ -1754,43 +1605,6 @@ void AssemblyWriter::printInfoComment(const Value &V) { if (AnnotationWriter) { AnnotationWriter->printInfoComment(V, Out); return; - } else if (EnableDebugInfoComment) { - bool Padded = false; - if (const Instruction *I = dyn_cast<Instruction>(&V)) { - const DebugLoc &DL = I->getDebugLoc(); - if (!DL.isUnknown()) { - if (!Padded) { - Out.PadToColumn(50); - Padded = true; - Out << ";"; - } - Out << " [debug line = "; - printDebugLoc(DL,Out); - Out << "]"; - } - if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { - const MDNode *Var = DDI->getVariable(); - if (!Padded) { - Out.PadToColumn(50); - Padded = true; - Out << ";"; - } - if (Var && Var->getNumOperands() >= 2) - if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2))) - Out << " [debug variable = " << MDS->getString() << "]"; - } - else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { - const MDNode *Var = DVI->getVariable(); - if (!Padded) { - Out.PadToColumn(50); - Padded = true; - Out << ";"; - } - if (Var && Var->getNumOperands() >= 2) - if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2))) - Out << " [debug variable = " << MDS->getString() << "]"; - } - } } } @@ -1873,16 +1687,16 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(I.getOperand(i), true); } Out << ']'; - } else if (isa<PHINode>(I)) { + } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) { Out << ' '; TypePrinter.print(I.getType(), Out); Out << ' '; - for (unsigned op = 0, Eop = I.getNumOperands(); op < Eop; op += 2) { + for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) { if (op) Out << ", "; Out << "[ "; - writeOperand(I.getOperand(op ), false); Out << ", "; - writeOperand(I.getOperand(op+1), false); Out << " ]"; + writeOperand(PN->getIncomingValue(op), false); Out << ", "; + writeOperand(PN->getIncomingBlock(op), false); Out << " ]"; } } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) { Out << ' '; @@ -1916,9 +1730,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } Operand = CI->getCalledValue(); - const PointerType *PTy = cast<PointerType>(Operand->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - const Type *RetTy = FTy->getReturnType(); + PointerType *PTy = cast<PointerType>(Operand->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + Type *RetTy = FTy->getReturnType(); const AttrListPtr &PAL = CI->getAttributes(); if (PAL.getRetAttributes() != Attribute::None) @@ -1949,9 +1763,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ' ' << Attribute::getAsString(PAL.getFnAttributes()); } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { Operand = II->getCalledValue(); - const PointerType *PTy = cast<PointerType>(Operand->getType()); - const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - const Type *RetTy = FTy->getReturnType(); + PointerType *PTy = cast<PointerType>(Operand->getType()); + FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); + Type *RetTy = FTy->getReturnType(); const AttrListPtr &PAL = II->getAttributes(); // Print the calling convention being used. @@ -2034,7 +1848,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // omit the type from all but the first operand. If the instruction has // different type operands (for example br), then they are all printed. bool PrintAllTypes = false; - const Type *TheType = Operand->getType(); + Type *TheType = Operand->getType(); // Select, Store and ShuffleVector always print all types. if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I) @@ -2154,7 +1968,15 @@ void Type::print(raw_ostream &OS) const { OS << "<null Type>"; return; } - TypePrinting().print(this, OS); + TypePrinting TP; + TP.print(const_cast<Type*>(this), OS); + + // If the type is a named struct type, print the body as well. + if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this))) + if (!STy->isAnonymous()) { + OS << " = type "; + TP.printStructBody(STy, OS); + } } void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { @@ -2210,14 +2032,7 @@ void Value::printCustom(raw_ostream &OS) const { void Value::dump() const { print(dbgs()); dbgs() << '\n'; } // Type::dump - allow easy printing of Types from the debugger. -// This one uses type names from the given context module -void Type::dump(const Module *Context) const { - WriteTypeSymbolic(dbgs(), this, Context); - dbgs() << '\n'; -} - -// Type::dump - allow easy printing of Types from the debugger. -void Type::dump() const { dump(0); } +void Type::dump() const { print(dbgs()); } // Module::dump() - Allow printing of Modules from the debugger. void Module::dump() const { print(dbgs(), 0); } diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index ee257dbde5f3..bf6efa1645a2 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -74,6 +74,8 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "naked "; if (Attrs & Attribute::Hotpatch) Result += "hotpatch "; + if (Attrs & Attribute::NonLazyBind) + Result += "nonlazybind "; if (Attrs & Attribute::StackAlignment) { Result += "alignstack("; Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs)); diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index f8f15caec91d..9e93ff370e25 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -28,558 +28,77 @@ using namespace llvm; static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { assert(F && "Illegal to upgrade a non-existent Function."); - // Get the Function's name. - const std::string& Name = F->getName(); - - // Convenience - const FunctionType *FTy = F->getFunctionType(); - // Quickly eliminate it, if it's not a candidate. - if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' || - Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.') + StringRef Name = F->getName(); + if (Name.size() <= 8 || !Name.startswith("llvm.")) return false; + Name = Name.substr(5); // Strip off "llvm." + const FunctionType *FTy = F->getFunctionType(); Module *M = F->getParent(); - switch (Name[5]) { + + switch (Name[0]) { default: break; - case 'a': - // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss, - // and atomics with default address spaces to their new names to their new - // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32) - if (Name.compare(5,7,"atomic.",7) == 0) { - if (Name.compare(12,3,"lcs",3) == 0) { - std::string::size_type delim = Name.find('.',12); - F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) + - ".p0" + Name.substr(delim+1)); - NewFn = F; - return true; - } - else if (Name.compare(12,3,"las",3) == 0) { - std::string::size_type delim = Name.find('.',12); - F->setName("llvm.atomic.load.add"+Name.substr(delim) - + ".p0" + Name.substr(delim+1)); - NewFn = F; - return true; - } - else if (Name.compare(12,3,"lss",3) == 0) { - std::string::size_type delim = Name.find('.',12); - F->setName("llvm.atomic.load.sub"+Name.substr(delim) - + ".p0" + Name.substr(delim+1)); - NewFn = F; - return true; - } - else if (Name.rfind(".p") == std::string::npos) { - // We don't have an address space qualifier so this has be upgraded - // to the new name. Copy the type name at the end of the intrinsic - // and add to it - std::string::size_type delim = Name.find_last_of('.'); - assert(delim != std::string::npos && "can not find type"); - F->setName(Name + ".p0" + Name.substr(delim+1)); - NewFn = F; - return true; - } - } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) { - if (((Name.compare(14, 5, "vmovl", 5) == 0 || - Name.compare(14, 5, "vaddl", 5) == 0 || - Name.compare(14, 5, "vsubl", 5) == 0 || - Name.compare(14, 5, "vaddw", 5) == 0 || - Name.compare(14, 5, "vsubw", 5) == 0 || - Name.compare(14, 5, "vmlal", 5) == 0 || - Name.compare(14, 5, "vmlsl", 5) == 0 || - Name.compare(14, 5, "vabdl", 5) == 0 || - Name.compare(14, 5, "vabal", 5) == 0) && - (Name.compare(19, 2, "s.", 2) == 0 || - Name.compare(19, 2, "u.", 2) == 0)) || - - (Name.compare(14, 4, "vaba", 4) == 0 && - (Name.compare(18, 2, "s.", 2) == 0 || - Name.compare(18, 2, "u.", 2) == 0)) || - - (Name.compare(14, 6, "vmovn.", 6) == 0)) { - - // Calls to these are transformed into IR without intrinsics. - NewFn = 0; - return true; - } - // Old versions of NEON ld/st intrinsics are missing alignment arguments. - bool isVLd = (Name.compare(14, 3, "vld", 3) == 0); - bool isVSt = (Name.compare(14, 3, "vst", 3) == 0); - if (isVLd || isVSt) { - unsigned NumVecs = Name.at(17) - '0'; - if (NumVecs == 0 || NumVecs > 4) - return false; - bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0); - if (!isLaneOp && Name.at(18) != '.') - return false; - unsigned ExpectedArgs = 2; // for the address and alignment - if (isVSt || isLaneOp) - ExpectedArgs += NumVecs; - if (isLaneOp) - ExpectedArgs += 1; // for the lane number - unsigned NumP = FTy->getNumParams(); - if (NumP != ExpectedArgs - 1) - return false; - - // Change the name of the old (bad) intrinsic, because - // its type is incorrect, but we cannot overload that name. - F->setName(""); - - // One argument is missing: add the alignment argument. - std::vector<const Type*> NewParams; - for (unsigned p = 0; p < NumP; ++p) - NewParams.push_back(FTy->getParamType(p)); - NewParams.push_back(Type::getInt32Ty(F->getContext())); - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), - NewParams, false); - NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy)); - return true; - } - } - break; - case 'b': - // This upgrades the name of the llvm.bswap intrinsic function to only use - // a single type name for overloading. We only care about the old format - // 'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being - // a '.' after 'bswap.' - if (Name.compare(5,6,"bswap.",6) == 0) { - std::string::size_type delim = Name.find('.',11); - - if (delim != std::string::npos) { - // Construct the new name as 'llvm.bswap' + '.i*' - F->setName(Name.substr(0,10)+Name.substr(delim)); - NewFn = F; - return true; - } - } - break; + case 'p': + // This upgrades the llvm.prefetch intrinsic to accept one more parameter, + // which is a instruction / data cache identifier. The old version only + // implicitly accepted the data version. + if (Name == "prefetch") { + // Don't do anything if it has the correct number of arguments already + if (FTy->getNumParams() == 4) + break; - case 'c': - // We only want to fix the 'llvm.ct*' intrinsics which do not have the - // correct return type, so we check for the name, and then check if the - // return type does not match the parameter type. - if ( (Name.compare(5,5,"ctpop",5) == 0 || - Name.compare(5,4,"ctlz",4) == 0 || - Name.compare(5,4,"cttz",4) == 0) && - FTy->getReturnType() != FTy->getParamType(0)) { - // We first need to change the name of the old (bad) intrinsic, because - // its type is incorrect, but we cannot overload that name. We - // arbitrarily unique it here allowing us to construct a correctly named + assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!"); + // We first need to change the name of the old (bad) intrinsic, because + // its type is incorrect, but we cannot overload that name. We + // arbitrarily unique it here allowing us to construct a correctly named // and typed function below. + std::string NameTmp = F->getName(); F->setName(""); - - // Now construct the new intrinsic with the correct name and type. We - // leave the old function around in order to query its type, whatever it - // may be, and correctly convert up to the new type. - NewFn = cast<Function>(M->getOrInsertFunction(Name, + NewFn = cast<Function>(M->getOrInsertFunction(NameTmp, + FTy->getReturnType(), FTy->getParamType(0), - FTy->getParamType(0), - (Type *)0)); + FTy->getParamType(1), + FTy->getParamType(2), + FTy->getParamType(2), + (Type*)0)); return true; } - break; - case 'e': - // The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector. - if (Name.compare("llvm.eh.selector.i32") == 0) { - F->setName("llvm.eh.selector"); - NewFn = F; - return true; - } - // The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for. - if (Name.compare("llvm.eh.typeid.for.i32") == 0) { - F->setName("llvm.eh.typeid.for"); - NewFn = F; - return true; - } - // Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector. - if (Name.compare("llvm.eh.selector.i64") == 0) { - NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector); - return true; - } - // Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for. - if (Name.compare("llvm.eh.typeid.for.i64") == 0) { - NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for); - return true; - } break; - - case 'm': { - // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the - // new format that allows overloading the pointer for different address - // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16) - const char* NewFnName = NULL; - if (Name.compare(5,8,"memcpy.i",8) == 0) { - if (Name[13] == '8') - NewFnName = "llvm.memcpy.p0i8.p0i8.i8"; - else if (Name.compare(13,2,"16") == 0) - NewFnName = "llvm.memcpy.p0i8.p0i8.i16"; - else if (Name.compare(13,2,"32") == 0) - NewFnName = "llvm.memcpy.p0i8.p0i8.i32"; - else if (Name.compare(13,2,"64") == 0) - NewFnName = "llvm.memcpy.p0i8.p0i8.i64"; - } else if (Name.compare(5,9,"memmove.i",9) == 0) { - if (Name[14] == '8') - NewFnName = "llvm.memmove.p0i8.p0i8.i8"; - else if (Name.compare(14,2,"16") == 0) - NewFnName = "llvm.memmove.p0i8.p0i8.i16"; - else if (Name.compare(14,2,"32") == 0) - NewFnName = "llvm.memmove.p0i8.p0i8.i32"; - else if (Name.compare(14,2,"64") == 0) - NewFnName = "llvm.memmove.p0i8.p0i8.i64"; - } - else if (Name.compare(5,8,"memset.i",8) == 0) { - if (Name[13] == '8') - NewFnName = "llvm.memset.p0i8.i8"; - else if (Name.compare(13,2,"16") == 0) - NewFnName = "llvm.memset.p0i8.i16"; - else if (Name.compare(13,2,"32") == 0) - NewFnName = "llvm.memset.p0i8.i32"; - else if (Name.compare(13,2,"64") == 0) - NewFnName = "llvm.memset.p0i8.i64"; - } + case 'x': { + const char *NewFnName = NULL; + // This fixes the poorly named crc32 intrinsics. + if (Name == "x86.sse42.crc32.8") + NewFnName = "llvm.x86.sse42.crc32.32.8"; + else if (Name == "x86.sse42.crc32.16") + NewFnName = "llvm.x86.sse42.crc32.32.16"; + else if (Name == "x86.sse42.crc32.32") + NewFnName = "llvm.x86.sse42.crc32.32.32"; + else if (Name == "x86.sse42.crc64.8") + NewFnName = "llvm.x86.sse42.crc32.64.8"; + else if (Name == "x86.sse42.crc64.64") + NewFnName = "llvm.x86.sse42.crc32.64.64"; + if (NewFnName) { - NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, - FTy->getReturnType(), - FTy->getParamType(0), - FTy->getParamType(1), - FTy->getParamType(2), - FTy->getParamType(3), - Type::getInt1Ty(F->getContext()), - (Type *)0)); + F->setName(NewFnName); + NewFn = F; return true; } - break; - } - case 'p': - // This upgrades the llvm.part.select overloaded intrinsic names to only - // use one type specifier in the name. We only care about the old format - // 'llvm.part.select.i*.i*', and solve as above with bswap. - if (Name.compare(5,12,"part.select.",12) == 0) { - std::string::size_type delim = Name.find('.',17); - - if (delim != std::string::npos) { - // Construct a new name as 'llvm.part.select' + '.i*' - F->setName(Name.substr(0,16)+Name.substr(delim)); - NewFn = F; - return true; - } - break; - } - // This upgrades the llvm.part.set intrinsics similarly as above, however - // we care about 'llvm.part.set.i*.i*.i*', but only the first two types - // must match. There is an additional type specifier after these two - // matching types that we must retain when upgrading. Thus, we require - // finding 2 periods, not just one, after the intrinsic name. - if (Name.compare(5,9,"part.set.",9) == 0) { - std::string::size_type delim = Name.find('.',14); - - if (delim != std::string::npos && - Name.find('.',delim+1) != std::string::npos) { - // Construct a new name as 'llvm.part.select' + '.i*.i*' - F->setName(Name.substr(0,13)+Name.substr(delim)); - NewFn = F; - return true; - } - break; - } - - break; - case 'x': - // This fixes the poorly named crc32 intrinsics - if (Name.compare(5, 13, "x86.sse42.crc", 13) == 0) { - const char* NewFnName = NULL; - if (Name.compare(18, 2, "32", 2) == 0) { - if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) { - NewFnName = "llvm.x86.sse42.crc32.32.8"; - } else if (Name.compare(20, 3, ".16") == 0 && Name.length() == 23) { - NewFnName = "llvm.x86.sse42.crc32.32.16"; - } else if (Name.compare(20, 3, ".32") == 0 && Name.length() == 23) { - NewFnName = "llvm.x86.sse42.crc32.32.32"; - } - } - else if (Name.compare(18, 2, "64", 2) == 0) { - if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) { - NewFnName = "llvm.x86.sse42.crc32.64.8"; - } else if (Name.compare(20, 3, ".64") == 0 && Name.length() == 23) { - NewFnName = "llvm.x86.sse42.crc32.64.64"; - } - } - if (NewFnName) { - F->setName(NewFnName); - NewFn = F; - return true; - } - } - - // This fixes all MMX shift intrinsic instructions to take a - // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8. - if (Name.compare(5, 8, "x86.mmx.", 8) == 0) { - const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext()); - - if (Name.compare(13, 4, "padd", 4) == 0 || - Name.compare(13, 4, "psub", 4) == 0 || - Name.compare(13, 4, "pmul", 4) == 0 || - Name.compare(13, 5, "pmadd", 5) == 0 || - Name.compare(13, 4, "pand", 4) == 0 || - Name.compare(13, 3, "por", 3) == 0 || - Name.compare(13, 4, "pxor", 4) == 0 || - Name.compare(13, 4, "pavg", 4) == 0 || - Name.compare(13, 4, "pmax", 4) == 0 || - Name.compare(13, 4, "pmin", 4) == 0 || - Name.compare(13, 4, "psad", 4) == 0 || - Name.compare(13, 4, "psll", 4) == 0 || - Name.compare(13, 4, "psrl", 4) == 0 || - Name.compare(13, 4, "psra", 4) == 0 || - Name.compare(13, 4, "pack", 4) == 0 || - Name.compare(13, 6, "punpck", 6) == 0 || - Name.compare(13, 4, "pcmp", 4) == 0) { - assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!"); - const Type *SecondParamTy = X86_MMXTy; - - if (Name.compare(13, 5, "pslli", 5) == 0 || - Name.compare(13, 5, "psrli", 5) == 0 || - Name.compare(13, 5, "psrai", 5) == 0) - SecondParamTy = FTy->getParamType(1); - - // Don't do anything if it has the correct types. - if (FTy->getReturnType() == X86_MMXTy && - FTy->getParamType(0) == X86_MMXTy && - FTy->getParamType(1) == SecondParamTy) - break; - - // We first need to change the name of the old (bad) intrinsic, because - // its type is incorrect, but we cannot overload that name. We - // arbitrarily unique it here allowing us to construct a correctly named - // and typed function below. - F->setName(""); - - // Now construct the new intrinsic with the correct name and type. We - // leave the old function around in order to query its type, whatever it - // may be, and correctly convert up to the new type. - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, X86_MMXTy, - SecondParamTy, (Type*)0)); - return true; - } - - if (Name.compare(13, 8, "maskmovq", 8) == 0) { - // Don't do anything if it has the correct types. - if (FTy->getParamType(0) == X86_MMXTy && - FTy->getParamType(1) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - FTy->getReturnType(), - X86_MMXTy, - X86_MMXTy, - FTy->getParamType(2), - (Type*)0)); - return true; - } - - if (Name.compare(13, 8, "pmovmskb", 8) == 0) { - if (FTy->getParamType(0) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - FTy->getReturnType(), - X86_MMXTy, - (Type*)0)); - return true; - } - - if (Name.compare(13, 5, "movnt", 5) == 0) { - if (FTy->getParamType(1) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - FTy->getReturnType(), - FTy->getParamType(0), - X86_MMXTy, - (Type*)0)); - return true; - } - - if (Name.compare(13, 7, "palignr", 7) == 0) { - if (FTy->getReturnType() == X86_MMXTy && - FTy->getParamType(0) == X86_MMXTy && - FTy->getParamType(1) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - X86_MMXTy, - X86_MMXTy, - FTy->getParamType(2), - (Type*)0)); - return true; - } - - if (Name.compare(13, 5, "pextr", 5) == 0) { - if (FTy->getParamType(0) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - FTy->getReturnType(), - X86_MMXTy, - FTy->getParamType(1), - (Type*)0)); - return true; - } - - if (Name.compare(13, 5, "pinsr", 5) == 0) { - if (FTy->getReturnType() == X86_MMXTy && - FTy->getParamType(0) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - X86_MMXTy, - FTy->getParamType(1), - FTy->getParamType(2), - (Type*)0)); - return true; - } - - if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) { - if (FTy->getReturnType() == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - FTy->getParamType(0), - (Type*)0)); - return true; - } - - if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) { - if (FTy->getParamType(0) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - FTy->getReturnType(), - X86_MMXTy, - (Type*)0)); - return true; - } - - if (Name.compare(13, 8, "vec.init", 8) == 0) { - if (FTy->getReturnType() == X86_MMXTy) - break; - - F->setName(""); - - if (Name.compare(21, 2, ".b", 2) == 0) - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - FTy->getParamType(0), - FTy->getParamType(1), - FTy->getParamType(2), - FTy->getParamType(3), - FTy->getParamType(4), - FTy->getParamType(5), - FTy->getParamType(6), - FTy->getParamType(7), - (Type*)0)); - else if (Name.compare(21, 2, ".w", 2) == 0) - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - FTy->getParamType(0), - FTy->getParamType(1), - FTy->getParamType(2), - FTy->getParamType(3), - (Type*)0)); - else if (Name.compare(21, 2, ".d", 2) == 0) - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - FTy->getParamType(0), - FTy->getParamType(1), - (Type*)0)); - return true; - } - - - if (Name.compare(13, 9, "vec.ext.d", 9) == 0) { - if (FTy->getReturnType() == X86_MMXTy && - FTy->getParamType(0) == X86_MMXTy) - break; - - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(Name, - X86_MMXTy, - X86_MMXTy, - FTy->getParamType(1), - (Type*)0)); - return true; - } - - if (Name.compare(13, 9, "emms", 4) == 0 || - Name.compare(13, 9, "femms", 5) == 0) { - NewFn = 0; - break; - } - - // We really shouldn't get here ever. - assert(0 && "Invalid MMX intrinsic!"); - break; - } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 || - Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 || - Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 || - Name.compare(5,15,"x86.sse2.movs.d",15) == 0 || - Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 || - Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 || - Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 || - Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 || - Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) { - // Calls to these intrinsics are transformed into ShuffleVector's. - NewFn = 0; - return true; - } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) { - // Calls to these intrinsics are transformed into vector multiplies. - NewFn = 0; + // Calls to these instructions are transformed into unaligned loads. + if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" || + Name == "x86.sse2.loadu.pd") return true; - } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 || - Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) { - // Calls to these intrinsics are transformed into vector shuffles, shifts, - // or 0. - NewFn = 0; - return true; - } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 || - Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 || - Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) { - // Calls to these instructions are transformed into unaligned loads. - NewFn = 0; - return true; - } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 || - Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 || - Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 || - Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) { - // Calls to these instructions are transformed into nontemporal stores. - NewFn = 0; - return true; - } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) { - // This is an SSE/MMX instruction. - const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext()); - NewFn = - cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w", - X86_MMXTy, - X86_MMXTy, - Type::getInt8Ty(F->getContext()), - (Type*)0)); + + // Calls to these instructions are transformed into nontemporal stores. + if (Name == "x86.sse.movnt.ps" || Name == "x86.sse2.movnt.dq" || + Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i") return true; - } break; } + } // This may not belong here. This function is effectively being overloaded // to both detect an intrinsic which needs upgrading, and to provide the @@ -601,105 +120,10 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { } bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { - StringRef Name(GV->getName()); - - // We are only upgrading one symbol here. - if (Name == ".llvm.eh.catch.all.value") { - GV->setName("llvm.eh.catch.all.value"); - return true; - } - + // Nothing to do yet. return false; } -/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results -/// have vector elements twice as big as one or both source operands, do the -/// sign- or zero-extension that used to be handled by intrinsics. The -/// extended values are returned via V0 and V1. -static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1, - Value *&V0, Value *&V1) { - Function *F = CI->getCalledFunction(); - const std::string& Name = F->getName(); - bool isLong = (Name.at(18) == 'l'); - bool isSigned = (Name.at(19) == 's'); - - if (isSigned) { - if (isLong) - V0 = new SExtInst(Arg0, CI->getType(), "", CI); - else - V0 = Arg0; - V1 = new SExtInst(Arg1, CI->getType(), "", CI); - } else { - if (isLong) - V0 = new ZExtInst(Arg0, CI->getType(), "", CI); - else - V0 = Arg0; - V1 = new ZExtInst(Arg1, CI->getType(), "", CI); - } -} - -/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba, -/// or vabal intrinsics, construct a call to a vabd intrinsic. Examine the -/// name of the old intrinsic to determine whether to use a signed or unsigned -/// vabd intrinsic. Get the type from the old call instruction, adjusted for -/// half-size vector elements if the old intrinsic was vabdl or vabal. -static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) { - Function *F = CI->getCalledFunction(); - const std::string& Name = F->getName(); - bool isLong = (Name.at(18) == 'l'); - bool isSigned = (Name.at(isLong ? 19 : 18) == 's'); - - Intrinsic::ID intID; - if (isSigned) - intID = Intrinsic::arm_neon_vabds; - else - intID = Intrinsic::arm_neon_vabdu; - - const Type *Ty = CI->getType(); - if (isLong) - Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty)); - - Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1); - Value *Operands[2]; - Operands[0] = Arg0; - Operands[1] = Arg1; - return CallInst::Create(VABD, Operands, Operands+2, - "upgraded."+CI->getName(), CI); -} - -/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn. -static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI, - Value **Operands, unsigned NumOps, - bool AssignName = true) { - // Construct a new CallInst. - CallInst *NewCI = - CallInst::Create(NewFn, Operands, Operands + NumOps, - AssignName ? "upgraded." + OldCI->getName() : "", OldCI); - - NewCI->setTailCall(OldCI->isTailCall()); - NewCI->setCallingConv(OldCI->getCallingConv()); - - // Handle any uses of the old CallInst. If the type has changed, add a cast. - if (!OldCI->use_empty()) { - if (OldCI->getType() != NewCI->getType()) { - Function *OldFn = OldCI->getCalledFunction(); - CastInst *RetCast = - CastInst::Create(CastInst::getCastOpcode(NewCI, true, - OldFn->getReturnType(), true), - NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI); - - // Replace all uses of the old call with the new cast which has the - // correct type. - OldCI->replaceAllUsesWith(RetCast); - } else { - OldCI->replaceAllUsesWith(NewCI); - } - } - - // Clean up the old call now that it has been completely upgraded. - OldCI->eraseFromParent(); -} - // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. @@ -711,284 +135,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { assert(F && "CallInst has no function associated with it."); if (!NewFn) { - // Get the Function's name. - const std::string& Name = F->getName(); - - // Upgrade ARM NEON intrinsics. - if (Name.compare(5, 9, "arm.neon.", 9) == 0) { - Instruction *NewI; - Value *V0, *V1; - if (Name.compare(14, 7, "vmovls.", 7) == 0) { - NewI = new SExtInst(CI->getArgOperand(0), CI->getType(), - "upgraded." + CI->getName(), CI); - } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) { - NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(), - "upgraded." + CI->getName(), CI); - } else if (Name.compare(14, 4, "vadd", 4) == 0) { - ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1); - NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI); - } else if (Name.compare(14, 4, "vsub", 4) == 0) { - ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1); - NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI); - } else if (Name.compare(14, 4, "vmul", 4) == 0) { - ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1); - NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI); - } else if (Name.compare(14, 4, "vmla", 4) == 0) { - ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1); - Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI); - NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI, - "upgraded."+CI->getName(), CI); - } else if (Name.compare(14, 4, "vmls", 4) == 0) { - ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1); - Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI); - NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI, - "upgraded."+CI->getName(), CI); - } else if (Name.compare(14, 4, "vabd", 4) == 0) { - NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1)); - NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI); - } else if (Name.compare(14, 4, "vaba", 4) == 0) { - NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2)); - if (Name.at(18) == 'l') - NewI = new ZExtInst(NewI, CI->getType(), "", CI); - NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI, - "upgraded."+CI->getName(), CI); - } else if (Name.compare(14, 6, "vmovn.", 6) == 0) { - NewI = new TruncInst(CI->getArgOperand(0), CI->getType(), - "upgraded." + CI->getName(), CI); - } else { - llvm_unreachable("Unknown arm.neon function for CallInst upgrade."); - } - // Replace any uses of the old CallInst. - if (!CI->use_empty()) - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - bool isLoadH = false, isLoadL = false, isMovL = false; - bool isMovSD = false, isShufPD = false; - bool isUnpckhPD = false, isUnpcklPD = false; - bool isPunpckhQPD = false, isPunpcklQPD = false; - if (F->getName() == "llvm.x86.sse2.loadh.pd") - isLoadH = true; - else if (F->getName() == "llvm.x86.sse2.loadl.pd") - isLoadL = true; - else if (F->getName() == "llvm.x86.sse2.movl.dq") - isMovL = true; - else if (F->getName() == "llvm.x86.sse2.movs.d") - isMovSD = true; - else if (F->getName() == "llvm.x86.sse2.shuf.pd") - isShufPD = true; - else if (F->getName() == "llvm.x86.sse2.unpckh.pd") - isUnpckhPD = true; - else if (F->getName() == "llvm.x86.sse2.unpckl.pd") - isUnpcklPD = true; - else if (F->getName() == "llvm.x86.sse2.punpckh.qdq") - isPunpckhQPD = true; - else if (F->getName() == "llvm.x86.sse2.punpckl.qdq") - isPunpcklQPD = true; - - if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD || - isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) { - std::vector<Constant*> Idxs; - Value *Op0 = CI->getArgOperand(0); - ShuffleVectorInst *SI = NULL; - if (isLoadH || isLoadL) { - Value *Op1 = UndefValue::get(Op0->getType()); - Value *Addr = new BitCastInst(CI->getArgOperand(1), - Type::getDoublePtrTy(C), - "upgraded.", CI); - Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI); - Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0); - Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI); - - if (isLoadH) { - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); - } else { - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); - } - Value *Mask = ConstantVector::get(Idxs); - SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); - } else if (isMovL) { - Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0); - Idxs.push_back(Zero); - Idxs.push_back(Zero); - Idxs.push_back(Zero); - Idxs.push_back(Zero); - Value *ZeroV = ConstantVector::get(Idxs); - - Idxs.clear(); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3)); - Value *Mask = ConstantVector::get(Idxs); - SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI); - } else if (isMovSD || - isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) { - Value *Op1 = CI->getArgOperand(1); - if (isMovSD) { - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); - } else if (isUnpckhPD || isPunpckhQPD) { - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3)); - } else { - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); - } - Value *Mask = ConstantVector::get(Idxs); - SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); - } else if (isShufPD) { - Value *Op1 = CI->getArgOperand(1); - unsigned MaskVal = - cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1)); - Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), - ((MaskVal >> 1) & 1)+2)); - Value *Mask = ConstantVector::get(Idxs); - SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); - } - - assert(SI && "Unexpected!"); - - // Handle any uses of the old CallInst. - if (!CI->use_empty()) - // Replace all uses of the old call with the new cast which has the - // correct type. - CI->replaceAllUsesWith(SI); - - // Clean up the old call now that it has been completely upgraded. - CI->eraseFromParent(); - } else if (F->getName() == "llvm.x86.sse41.pmulld") { - // Upgrade this set of intrinsics into vector multiplies. - Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0), - CI->getArgOperand(1), - CI->getName(), - CI); - // Fix up all the uses with our new multiply. - if (!CI->use_empty()) - CI->replaceAllUsesWith(Mul); - - // Remove upgraded multiply. - CI->eraseFromParent(); - } else if (F->getName() == "llvm.x86.ssse3.palign.r") { - Value *Op1 = CI->getArgOperand(0); - Value *Op2 = CI->getArgOperand(1); - Value *Op3 = CI->getArgOperand(2); - unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue(); - Value *Rep; - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - - // If palignr is shifting the pair of input vectors less than 9 bytes, - // emit a shuffle instruction. - if (shiftVal <= 8) { - const Type *IntTy = Type::getInt32Ty(C); - const Type *EltTy = Type::getInt8Ty(C); - const Type *VecTy = VectorType::get(EltTy, 8); - - Op2 = Builder.CreateBitCast(Op2, VecTy); - Op1 = Builder.CreateBitCast(Op1, VecTy); - - llvm::SmallVector<llvm::Constant*, 8> Indices; - for (unsigned i = 0; i != 8; ++i) - Indices.push_back(ConstantInt::get(IntTy, shiftVal + i)); - - Value *SV = ConstantVector::get(Indices); - Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr"); - Rep = Builder.CreateBitCast(Rep, F->getReturnType()); - } - - // If palignr is shifting the pair of input vectors more than 8 but less - // than 16 bytes, emit a logical right shift of the destination. - else if (shiftVal < 16) { - // MMX has these as 1 x i64 vectors for some odd optimization reasons. - const Type *EltTy = Type::getInt64Ty(C); - const Type *VecTy = VectorType::get(EltTy, 1); - - Op1 = Builder.CreateBitCast(Op1, VecTy, "cast"); - Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8); - - // create i32 constant - Function *I = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q); - Rep = Builder.CreateCall2(I, Op1, Op2, "palignr"); - } - - // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. - else { - Rep = Constant::getNullValue(F->getReturnType()); - } - - // Replace any uses with our new instruction. - if (!CI->use_empty()) - CI->replaceAllUsesWith(Rep); - - // Remove upgraded instruction. - CI->eraseFromParent(); - - } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") { - Value *Op1 = CI->getArgOperand(0); - Value *Op2 = CI->getArgOperand(1); - Value *Op3 = CI->getArgOperand(2); - unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue(); - Value *Rep; - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - - // If palignr is shifting the pair of input vectors less than 17 bytes, - // emit a shuffle instruction. - if (shiftVal <= 16) { - const Type *IntTy = Type::getInt32Ty(C); - const Type *EltTy = Type::getInt8Ty(C); - const Type *VecTy = VectorType::get(EltTy, 16); - - Op2 = Builder.CreateBitCast(Op2, VecTy); - Op1 = Builder.CreateBitCast(Op1, VecTy); - - llvm::SmallVector<llvm::Constant*, 16> Indices; - for (unsigned i = 0; i != 16; ++i) - Indices.push_back(ConstantInt::get(IntTy, shiftVal + i)); - - Value *SV = ConstantVector::get(Indices); - Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr"); - Rep = Builder.CreateBitCast(Rep, F->getReturnType()); - } - - // If palignr is shifting the pair of input vectors more than 16 but less - // than 32 bytes, emit a logical right shift of the destination. - else if (shiftVal < 32) { - const Type *EltTy = Type::getInt64Ty(C); - const Type *VecTy = VectorType::get(EltTy, 2); - const Type *IntTy = Type::getInt32Ty(C); - - Op1 = Builder.CreateBitCast(Op1, VecTy, "cast"); - Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8); - - // create i32 constant - Function *I = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq); - Rep = Builder.CreateCall2(I, Op1, Op2, "palignr"); - } - - // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. - else { - Rep = Constant::getNullValue(F->getReturnType()); - } - - // Replace any uses with our new instruction. - if (!CI->use_empty()) - CI->replaceAllUsesWith(Rep); - - // Remove upgraded instruction. - CI->eraseFromParent(); - - } else if (F->getName() == "llvm.x86.sse.loadu.ps" || - F->getName() == "llvm.x86.sse2.loadu.dq" || - F->getName() == "llvm.x86.sse2.loadu.pd") { + if (F->getName() == "llvm.x86.sse.loadu.ps" || + F->getName() == "llvm.x86.sse2.loadu.dq" || + F->getName() == "llvm.x86.sse2.loadu.pd") { // Convert to a native, unaligned load. const Type *VecTy = CI->getType(); const Type *IntTy = IntegerType::get(C, 128); @@ -1040,306 +189,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } switch (NewFn->getIntrinsicID()) { - default: llvm_unreachable("Unknown function for CallInst upgrade."); - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: { - // Add a default alignment argument of 1. - SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end()); - Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); - CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), - CI->getName(), CI); - NewCI->setTailCall(CI->isTailCall()); - NewCI->setCallingConv(CI->getCallingConv()); - - // Handle any uses of the old CallInst. - if (!CI->use_empty()) - // Replace all uses of the old call with the new cast which has the - // correct type. - CI->replaceAllUsesWith(NewCI); - - // Clean up the old call now that it has been completely upgraded. - CI->eraseFromParent(); - break; - } - - case Intrinsic::x86_mmx_padd_b: - case Intrinsic::x86_mmx_padd_w: - case Intrinsic::x86_mmx_padd_d: - case Intrinsic::x86_mmx_padd_q: - case Intrinsic::x86_mmx_padds_b: - case Intrinsic::x86_mmx_padds_w: - case Intrinsic::x86_mmx_paddus_b: - case Intrinsic::x86_mmx_paddus_w: - case Intrinsic::x86_mmx_psub_b: - case Intrinsic::x86_mmx_psub_w: - case Intrinsic::x86_mmx_psub_d: - case Intrinsic::x86_mmx_psub_q: - case Intrinsic::x86_mmx_psubs_b: - case Intrinsic::x86_mmx_psubs_w: - case Intrinsic::x86_mmx_psubus_b: - case Intrinsic::x86_mmx_psubus_w: - case Intrinsic::x86_mmx_pmulh_w: - case Intrinsic::x86_mmx_pmull_w: - case Intrinsic::x86_mmx_pmulhu_w: - case Intrinsic::x86_mmx_pmulu_dq: - case Intrinsic::x86_mmx_pmadd_wd: - case Intrinsic::x86_mmx_pand: - case Intrinsic::x86_mmx_pandn: - case Intrinsic::x86_mmx_por: - case Intrinsic::x86_mmx_pxor: - case Intrinsic::x86_mmx_pavg_b: - case Intrinsic::x86_mmx_pavg_w: - case Intrinsic::x86_mmx_pmaxu_b: - case Intrinsic::x86_mmx_pmaxs_w: - case Intrinsic::x86_mmx_pminu_b: - case Intrinsic::x86_mmx_pmins_w: - case Intrinsic::x86_mmx_psad_bw: - case Intrinsic::x86_mmx_psll_w: - case Intrinsic::x86_mmx_psll_d: - case Intrinsic::x86_mmx_psll_q: - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrl_w: - case Intrinsic::x86_mmx_psrl_d: - case Intrinsic::x86_mmx_psrl_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psra_w: - case Intrinsic::x86_mmx_psra_d: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: - case Intrinsic::x86_mmx_packsswb: - case Intrinsic::x86_mmx_packssdw: - case Intrinsic::x86_mmx_packuswb: - case Intrinsic::x86_mmx_punpckhbw: - case Intrinsic::x86_mmx_punpckhwd: - case Intrinsic::x86_mmx_punpckhdq: - case Intrinsic::x86_mmx_punpcklbw: - case Intrinsic::x86_mmx_punpcklwd: - case Intrinsic::x86_mmx_punpckldq: - case Intrinsic::x86_mmx_pcmpeq_b: - case Intrinsic::x86_mmx_pcmpeq_w: - case Intrinsic::x86_mmx_pcmpeq_d: - case Intrinsic::x86_mmx_pcmpgt_b: - case Intrinsic::x86_mmx_pcmpgt_w: - case Intrinsic::x86_mmx_pcmpgt_d: { - Value *Operands[2]; - - // Cast the operand to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - - switch (NewFn->getIntrinsicID()) { - default: - // Cast to the X86 MMX type. - Operands[1] = new BitCastInst(CI->getArgOperand(1), - NewFn->getFunctionType()->getParamType(1), - "upgraded.", CI); - break; - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: - // These take an i32 as their second parameter. - Operands[1] = CI->getArgOperand(1); - break; - } - - ConstructNewCallInst(NewFn, CI, Operands, 2); - break; - } - case Intrinsic::x86_mmx_maskmovq: { - Value *Operands[3]; - - // Cast the operands to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - Operands[1] = new BitCastInst(CI->getArgOperand(1), - NewFn->getFunctionType()->getParamType(1), - "upgraded.", CI); - Operands[2] = CI->getArgOperand(2); - - ConstructNewCallInst(NewFn, CI, Operands, 3, false); - break; - } - case Intrinsic::x86_mmx_pmovmskb: { - Value *Operands[1]; - - // Cast the operand to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - - ConstructNewCallInst(NewFn, CI, Operands, 1); - break; - } - case Intrinsic::x86_mmx_movnt_dq: { - Value *Operands[2]; - - Operands[0] = CI->getArgOperand(0); - - // Cast the operand to the X86 MMX type. - Operands[1] = new BitCastInst(CI->getArgOperand(1), - NewFn->getFunctionType()->getParamType(1), - "upgraded.", CI); - - ConstructNewCallInst(NewFn, CI, Operands, 2, false); - break; - } - case Intrinsic::x86_mmx_palignr_b: { - Value *Operands[3]; - - // Cast the operands to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - Operands[1] = new BitCastInst(CI->getArgOperand(1), - NewFn->getFunctionType()->getParamType(1), - "upgraded.", CI); - Operands[2] = CI->getArgOperand(2); - - ConstructNewCallInst(NewFn, CI, Operands, 3); - break; - } - case Intrinsic::x86_mmx_pextr_w: { - Value *Operands[2]; - - // Cast the operands to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - Operands[1] = CI->getArgOperand(1); - - ConstructNewCallInst(NewFn, CI, Operands, 2); - break; - } - case Intrinsic::x86_mmx_pinsr_w: { - Value *Operands[3]; - - // Cast the operands to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - Operands[1] = CI->getArgOperand(1); - Operands[2] = CI->getArgOperand(2); - - ConstructNewCallInst(NewFn, CI, Operands, 3); - break; - } - case Intrinsic::x86_sse_pshuf_w: { + case Intrinsic::prefetch: { IRBuilder<> Builder(C); Builder.SetInsertPoint(CI->getParent(), CI); + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext()); - // Cast the operand to the X86 MMX type. - Value *Operands[2]; - Operands[0] = - Builder.CreateBitCast(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded."); - Operands[1] = - Builder.CreateTrunc(CI->getArgOperand(1), - Type::getInt8Ty(C), - "upgraded."); - - ConstructNewCallInst(NewFn, CI, Operands, 2); - break; - } - - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: { - // Build a small vector of the original arguments. - SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end()); - - // Construct a new CallInst - CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), - "upgraded."+CI->getName(), CI); - NewCI->setTailCall(CI->isTailCall()); - NewCI->setCallingConv(CI->getCallingConv()); - - // Handle any uses of the old CallInst. - if (!CI->use_empty()) { - // Check for sign extend parameter attributes on the return values. - bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt); - bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt); - - // Construct an appropriate cast from the new return type to the old. - CastInst *RetCast = CastInst::Create( - CastInst::getCastOpcode(NewCI, SrcSExt, - F->getReturnType(), - DestSExt), - NewCI, F->getReturnType(), - NewCI->getName(), CI); - NewCI->moveBefore(RetCast); - - // Replace all uses of the old call with the new cast which has the - // correct type. - CI->replaceAllUsesWith(RetCast); - } - - // Clean up the old call now that it has been completely upgraded. - CI->eraseFromParent(); - } - break; - case Intrinsic::eh_selector: - case Intrinsic::eh_typeid_for: { - // Only the return type changed. - SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end()); - CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), - "upgraded." + CI->getName(), CI); - NewCI->setTailCall(CI->isTailCall()); - NewCI->setCallingConv(CI->getCallingConv()); - - // Handle any uses of the old CallInst. - if (!CI->use_empty()) { - // Construct an appropriate cast from the new return type to the old. - CastInst *RetCast = - CastInst::Create(CastInst::getCastOpcode(NewCI, true, - F->getReturnType(), true), - NewCI, F->getReturnType(), NewCI->getName(), CI); - CI->replaceAllUsesWith(RetCast); - } - CI->eraseFromParent(); - } - break; - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::memset: { - // Add isVolatile - const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext()); - Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), CI->getArgOperand(3), - llvm::ConstantInt::get(I1Ty, 0) }; - CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5, + // Add the extra "data cache" argument + Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), + llvm::ConstantInt::get(I32Ty, 1) }; + CallInst *NewCI = CallInst::Create(NewFn, Operands, CI->getName(), CI); NewCI->setTailCall(CI->isTailCall()); NewCI->setCallingConv(CI->getCallingConv()); // Handle any uses of the old CallInst. if (!CI->use_empty()) - // Replace all uses of the old call with the new cast which has the + // Replace all uses of the old call with the new cast which has the // correct type. CI->replaceAllUsesWith(NewCI); - + // Clean up the old call now that it has been completely upgraded. CI->eraseFromParent(); break; @@ -1354,13 +222,13 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); // Upgrade the function and check if it is a totaly new function. - Function* NewFn; + Function *NewFn; if (UpgradeIntrinsicFunction(F, NewFn)) { if (NewFn != F) { // Replace all uses to the old function with the new one if necessary. for (Value::use_iterator UI = F->use_begin(), UE = F->use_end(); UI != UE; ) { - if (CallInst* CI = dyn_cast<CallInst>(*UI++)) + if (CallInst *CI = dyn_cast<CallInst>(*UI++)) UpgradeIntrinsicCall(CI, NewFn); } // Remove old function, no longer used, from the module. @@ -1373,37 +241,27 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { /// If an llvm.dbg.declare intrinsic is invalid, then this function simply /// strips that use. void llvm::CheckDebugInfoIntrinsics(Module *M) { - - if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) { - while (!FuncStart->use_empty()) { - CallInst *CI = cast<CallInst>(FuncStart->use_back()); - CI->eraseFromParent(); - } + while (!FuncStart->use_empty()) + cast<CallInst>(FuncStart->use_back())->eraseFromParent(); FuncStart->eraseFromParent(); } if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) { - while (!StopPoint->use_empty()) { - CallInst *CI = cast<CallInst>(StopPoint->use_back()); - CI->eraseFromParent(); - } + while (!StopPoint->use_empty()) + cast<CallInst>(StopPoint->use_back())->eraseFromParent(); StopPoint->eraseFromParent(); } if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) { - while (!RegionStart->use_empty()) { - CallInst *CI = cast<CallInst>(RegionStart->use_back()); - CI->eraseFromParent(); - } + while (!RegionStart->use_empty()) + cast<CallInst>(RegionStart->use_back())->eraseFromParent(); RegionStart->eraseFromParent(); } if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) { - while (!RegionEnd->use_empty()) { - CallInst *CI = cast<CallInst>(RegionEnd->use_back()); - CI->eraseFromParent(); - } + while (!RegionEnd->use_empty()) + cast<CallInst>(RegionEnd->use_back())->eraseFromParent(); RegionEnd->eraseFromParent(); } diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp index 955a0285b260..70265c899d7e 100644 --- a/lib/VMCore/BasicBlock.cpp +++ b/lib/VMCore/BasicBlock.cpp @@ -147,6 +147,26 @@ Instruction* BasicBlock::getFirstNonPHIOrDbg() { return &*i; } +Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() { + // All valid basic blocks should have a terminator, + // which is not a PHINode. If we have an invalid basic + // block we'll get an assertion failure when dereferencing + // a past-the-end iterator. + BasicBlock::iterator i = begin(); + for (;; ++i) { + if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) + continue; + + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i); + if (!II) + break; + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + break; + } + return &*i; +} + void BasicBlock::dropAllReferences() { for(iterator I = begin(), E = end(); I != E; ++I) I->dropAllReferences(); @@ -227,8 +247,8 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, // If the PHI _HAD_ two uses, replace PHI node with its now *single* value if (max_idx == 2) { - if (PN->getOperand(0) != PN) - PN->replaceAllUsesWith(PN->getOperand(0)); + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); else // We are left with an infinite loop with no entries: kill the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); @@ -308,3 +328,19 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { return New; } +void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { + TerminatorInst *TI = getTerminator(); + if (!TI) + // Cope with being called on a BasicBlock that doesn't have a terminator + // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this. + return; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = TI->getSuccessor(i); + for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II); + ++II) { + int i; + while ((i = PN->getBasicBlockIndex(this)) >= 0) + PN->setIncomingBlock(i, New); + } + } +} diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 6bde263ce625..f60dd06c98a6 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -29,7 +29,6 @@ add_llvm_library(LLVMCore PassRegistry.cpp PrintModulePass.cpp Type.cpp - TypeSymbolTable.cpp Use.cpp User.cpp Value.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 9985adaf576e..323e2a280999 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -559,7 +559,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy)); - return ConstantVector::get(DestVecTy, res); + return ConstantVector::get(res); } // We actually have to do a cast now. Perform the cast according to the @@ -730,9 +730,12 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, } + if (isa<UndefValue>(Cond)) { + if (isa<UndefValue>(V1)) return V1; + return V2; + } if (isa<UndefValue>(V1)) return V2; if (isa<UndefValue>(V2)) return V1; - if (isa<UndefValue>(Cond)) return V1; if (V1 == V2) return V1; if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) { @@ -877,42 +880,38 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, } Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg, - const unsigned *Idxs, - unsigned NumIdx) { + ArrayRef<unsigned> Idxs) { // Base case: no indices, so return the entire value. - if (NumIdx == 0) + if (Idxs.empty()) return Agg; if (isa<UndefValue>(Agg)) // ev(undef, x) -> undef return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(), - Idxs, - Idxs + NumIdx)); + Idxs)); if (isa<ConstantAggregateZero>(Agg)) // ev(0, x) -> 0 return Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(), - Idxs, - Idxs + NumIdx)); + Idxs)); // Otherwise recurse. if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) - return ConstantFoldExtractValueInstruction(CS->getOperand(*Idxs), - Idxs+1, NumIdx-1); + return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]), + Idxs.slice(1)); if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) - return ConstantFoldExtractValueInstruction(CA->getOperand(*Idxs), - Idxs+1, NumIdx-1); + return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]), + Idxs.slice(1)); ConstantVector *CV = cast<ConstantVector>(Agg); - return ConstantFoldExtractValueInstruction(CV->getOperand(*Idxs), - Idxs+1, NumIdx-1); + return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]), + Idxs.slice(1)); } Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, - const unsigned *Idxs, - unsigned NumIdx) { + ArrayRef<unsigned> Idxs) { // Base case: no indices, so replace the entire value. - if (NumIdx == 0) + if (Idxs.empty()) return Val; if (isa<UndefValue>(Agg)) { @@ -934,15 +933,15 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, for (unsigned i = 0; i < numOps; ++i) { const Type *MemberTy = AggTy->getTypeAtIndex(i); Constant *Op = - (*Idxs == i) ? + (Idxs[0] == i) ? ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy), - Val, Idxs+1, NumIdx-1) : + Val, Idxs.slice(1)) : UndefValue::get(MemberTy); Ops[i] = Op; } if (const StructType* ST = dyn_cast<StructType>(AggTy)) - return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); + return ConstantStruct::get(ST, Ops); return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -965,15 +964,15 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, for (unsigned i = 0; i < numOps; ++i) { const Type *MemberTy = AggTy->getTypeAtIndex(i); Constant *Op = - (*Idxs == i) ? + (Idxs[0] == i) ? ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy), - Val, Idxs+1, NumIdx-1) : + Val, Idxs.slice(1)) : Constant::getNullValue(MemberTy); Ops[i] = Op; } if (const StructType *ST = dyn_cast<StructType>(AggTy)) - return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); + return ConstantStruct::get(ST, Ops); return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -982,13 +981,13 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, std::vector<Constant*> Ops(Agg->getNumOperands()); for (unsigned i = 0; i < Agg->getNumOperands(); ++i) { Constant *Op = cast<Constant>(Agg->getOperand(i)); - if (*Idxs == i) - Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs+1, NumIdx-1); + if (Idxs[0] == i) + Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1)); Ops[i] = Op; } if (const StructType* ST = dyn_cast<StructType>(Agg->getType())) - return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); + return ConstantStruct::get(ST, Ops); return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops); } @@ -1014,20 +1013,38 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::Add: case Instruction::Sub: return UndefValue::get(C1->getType()); - case Instruction::Mul: case Instruction::And: + if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef + return C1; + return Constant::getNullValue(C1->getType()); // undef & X -> 0 + case Instruction::Mul: { + ConstantInt *CI; + // X * undef -> undef if X is odd or undef + if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) || + ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) || + (isa<UndefValue>(C1) && isa<UndefValue>(C2))) + return UndefValue::get(C1->getType()); + + // X * undef -> 0 otherwise return Constant::getNullValue(C1->getType()); + } case Instruction::UDiv: case Instruction::SDiv: + // undef / 1 -> undef + if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv) + if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) + if (CI2->isOne()) + return C1; + // FALL THROUGH case Instruction::URem: case Instruction::SRem: if (!isa<UndefValue>(C2)) // undef / X -> 0 return Constant::getNullValue(C1->getType()); return C2; // X / undef -> undef case Instruction::Or: // X | undef -> -1 - if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType())) - return Constant::getAllOnesValue(PTy); - return Constant::getAllOnesValue(C1->getType()); + if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef + return C1; + return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0 case Instruction::LShr: if (isa<UndefValue>(C2) && isa<UndefValue>(C1)) return C1; // undef lshr undef -> undef @@ -1041,6 +1058,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, else return C1; // X ashr undef --> X case Instruction::Shl: + if (isa<UndefValue>(C2) && isa<UndefValue>(C1)) + return C1; // undef shl undef -> undef // undef << X -> 0 or X << undef -> 0 return Constant::getNullValue(C1->getType()); } @@ -1443,8 +1462,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, /// isZeroSizedType - This type is zero sized if its an array or structure of /// zero sized types. The only leaf zero sized type is an empty structure. static bool isMaybeZeroSizedType(const Type *Ty) { - if (Ty->isOpaqueTy()) return true; // Can't say. if (const StructType *STy = dyn_cast<StructType>(Ty)) { + if (STy->isOpaque()) return true; // Can't say. // If all of elements have zero size, this does too. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) @@ -1831,7 +1850,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) { // For EQ and NE, we can always pick a value for the undef to make the // predicate pass or fail, so we can return undef. - if (ICmpInst::isEquality(ICmpInst::Predicate(pred))) + // Also, if both operands are undef, we can return undef. + if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) || + (isa<UndefValue>(C1) && isa<UndefValue>(C2))) return UndefValue::get(ResultTy); // Otherwise, pick the same value as the non-undef operand, and fold // it to true or false. @@ -2147,9 +2168,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, bool inBounds, IndexTy const *Idxs, unsigned NumIdx) { + if (NumIdx == 0) return C; Constant *Idx0 = cast<Constant>(Idxs[0]); - if (NumIdx == 0 || - (NumIdx == 1 && Idx0->isNullValue())) + if ((NumIdx == 1 && Idx0->isNullValue())) return C; if (isa<UndefValue>(C)) { diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h index 0ecd7b49a48e..653a1c3f377d 100644 --- a/lib/VMCore/ConstantFold.h +++ b/lib/VMCore/ConstantFold.h @@ -19,6 +19,8 @@ #ifndef CONSTANTFOLDING_H #define CONSTANTFOLDING_H +#include "llvm/ADT/ArrayRef.h" + namespace llvm { class Value; class Constant; @@ -38,11 +40,9 @@ namespace llvm { Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, Constant *Mask); Constant *ConstantFoldExtractValueInstruction(Constant *Agg, - const unsigned *Idxs, - unsigned NumIdx); + ArrayRef<unsigned> Idxs); Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, - const unsigned *Idxs, - unsigned NumIdx); + ArrayRef<unsigned> Idxs); Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2); Constant *ConstantFoldCompareInstruction(unsigned short predicate, diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 15d7793d5893..316c8846f94f 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include <algorithm> #include <cstdarg> using namespace llvm; @@ -39,6 +40,28 @@ using namespace llvm; // Constant Class //===----------------------------------------------------------------------===// +bool Constant::isNegativeZeroValue() const { + // Floating point values have an explicit -0.0 value. + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) + return CFP->isZero() && CFP->isNegative(); + + // Otherwise, just use +0.0. + return isNullValue(); +} + +bool Constant::isNullValue() const { + // 0 is null. + if (const ConstantInt *CI = dyn_cast<ConstantInt>(this)) + return CI->isZero(); + + // +0.0 is null. + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) + return CFP->isZero() && !CFP->isNegative(); + + // constant zero is zero for aggregates and cpnull is null for pointers. + return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this); +} + // Constructor to create a '0' constant of arbitrary type... Constant *Constant::getNullValue(const Type *Ty) { switch (Ty->getTypeID()) { @@ -541,11 +564,7 @@ ConstantFP::ConstantFP(const Type *Ty, const APFloat& V) "FP type Mismatch"); } -bool ConstantFP::isNullValue() const { - return Val.isZero() && !Val.isNegative(); -} - -bool ConstantFP::isExactlyValue(const APFloat& V) const { +bool ConstantFP::isExactlyValue(const APFloat &V) const { return Val.bitwiseIsEqual(V); } @@ -571,8 +590,7 @@ ConstantArray::ConstantArray(const ArrayType *T, } } -Constant *ConstantArray::get(const ArrayType *Ty, - const std::vector<Constant*> &V) { +Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) { for (unsigned i = 0, e = V.size(); i != e; ++i) { assert(V[i]->getType() == Ty->getElementType() && "Wrong type in array element initializer"); @@ -592,13 +610,6 @@ Constant *ConstantArray::get(const ArrayType *Ty, return ConstantAggregateZero::get(Ty); } - -Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals, - unsigned NumVals) { - // FIXME: make this the primary ctor method. - return get(T, std::vector<Constant*>(Vals, Vals+NumVals)); -} - /// ConstantArray::get(const string&) - Return an array that is initialized to /// contain the specified string. If length is zero then a null terminator is /// added to the specified string so that it may be used in a natural way. @@ -621,63 +632,64 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str, return get(ATy, ElementVals); } +/// getTypeForElements - Return an anonymous struct type to use for a constant +/// with the specified set of elements. The list must not be empty. +StructType *ConstantStruct::getTypeForElements(LLVMContext &Context, + ArrayRef<Constant*> V, + bool Packed) { + SmallVector<Type*, 16> EltTypes; + for (unsigned i = 0, e = V.size(); i != e; ++i) + EltTypes.push_back(V[i]->getType()); + + return StructType::get(Context, EltTypes, Packed); +} + + +StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V, + bool Packed) { + assert(!V.empty() && + "ConstantStruct::getTypeForElements cannot be called on empty list"); + return getTypeForElements(V[0]->getContext(), V, Packed); +} + + ConstantStruct::ConstantStruct(const StructType *T, const std::vector<Constant*> &V) : Constant(T, ConstantStructVal, OperandTraits<ConstantStruct>::op_end(this) - V.size(), V.size()) { - assert(V.size() == T->getNumElements() && + assert((T->isOpaque() || V.size() == T->getNumElements()) && "Invalid initializer vector for constant structure"); Use *OL = OperandList; for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end(); I != E; ++I, ++OL) { Constant *C = *I; - assert(C->getType() == T->getElementType(I-V.begin()) && + assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) && "Initializer for struct element doesn't match struct element type!"); *OL = C; } } // ConstantStruct accessors. -Constant *ConstantStruct::get(const StructType* T, - const std::vector<Constant*>& V) { - LLVMContextImpl* pImpl = T->getContext().pImpl; - - // Create a ConstantAggregateZero value if all elements are zeros... +Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) { + // Create a ConstantAggregateZero value if all elements are zeros. for (unsigned i = 0, e = V.size(); i != e; ++i) if (!V[i]->isNullValue()) - return pImpl->StructConstants.getOrCreate(T, V); - - return ConstantAggregateZero::get(T); -} - -Constant *ConstantStruct::get(LLVMContext &Context, - const std::vector<Constant*>& V, bool packed) { - std::vector<const Type*> StructEls; - StructEls.reserve(V.size()); - for (unsigned i = 0, e = V.size(); i != e; ++i) - StructEls.push_back(V[i]->getType()); - return get(StructType::get(Context, StructEls, packed), V); -} + return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V); -Constant *ConstantStruct::get(LLVMContext &Context, - Constant *const *Vals, unsigned NumVals, - bool Packed) { - // FIXME: make this the primary ctor method. - return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed); + assert((ST->isOpaque() || ST->getNumElements() == V.size()) && + "Incorrect # elements specified to ConstantStruct::get"); + return ConstantAggregateZero::get(ST); } -Constant* ConstantStruct::get(LLVMContext &Context, bool Packed, - Constant * Val, ...) { +Constant* ConstantStruct::get(const StructType *T, ...) { va_list ap; - std::vector<Constant*> Values; - va_start(ap, Val); - while (Val) { + SmallVector<Constant*, 8> Values; + va_start(ap, T); + while (Constant *Val = va_arg(ap, llvm::Constant*)) Values.push_back(Val); - Val = va_arg(ap, llvm::Constant*); - } va_end(ap); - return get(Context, Values, Packed); + return get(T, Values); } ConstantVector::ConstantVector(const VectorType *T, @@ -696,9 +708,9 @@ ConstantVector::ConstantVector(const VectorType *T, } // ConstantVector accessors. -Constant *ConstantVector::get(const VectorType *T, - const std::vector<Constant*> &V) { +Constant *ConstantVector::get(ArrayRef<Constant*> V) { assert(!V.empty() && "Vectors can't be empty"); + const VectorType *T = VectorType::get(V.front()->getType(), V.size()); LLVMContextImpl *pImpl = T->getContext().pImpl; // If this is an all-undef or all-zero vector, return a @@ -723,12 +735,6 @@ Constant *ConstantVector::get(const VectorType *T, return pImpl->VectorConstants.getOrCreate(T, V); } -Constant *ConstantVector::get(ArrayRef<Constant*> V) { - // FIXME: make this the primary ctor method. - assert(!V.empty() && "Vectors cannot be empty"); - return get(VectorType::get(V.front()->getType(), V.size()), V.vec()); -} - // Utility function for determining if a ConstantExpr is a CastOp or not. This // can't be inline because we don't want to #include Instruction.h into // Constant.h @@ -779,8 +785,7 @@ ArrayRef<unsigned> ConstantExpr::getIndices() const { } unsigned ConstantExpr::getPredicate() const { - assert(getOpcode() == Instruction::FCmp || - getOpcode() == Instruction::ICmp); + assert(isCompare()); return ((const CompareConstantExpr*)this)->predicate; } @@ -851,17 +856,15 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { } /// getWithOperands - This returns the current constant expression with the -/// operands replaced with the specified values. The specified operands must -/// match count and type with the existing ones. +/// operands replaced with the specified values. The specified array must +/// have the same number of operands as our current one. Constant *ConstantExpr:: -getWithOperands(ArrayRef<Constant*> Ops) const { +getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const { assert(Ops.size() == getNumOperands() && "Operand count mismatch!"); - bool AnyChange = false; - for (unsigned i = 0; i != Ops.size(); ++i) { - assert(Ops[i]->getType() == getOperand(i)->getType() && - "Operand type mismatch!"); + bool AnyChange = Ty != getType(); + for (unsigned i = 0; i != Ops.size(); ++i) AnyChange |= Ops[i] != getOperand(i); - } + if (!AnyChange) // No operands changed, return self. return const_cast<ConstantExpr*>(this); @@ -878,7 +881,7 @@ getWithOperands(ArrayRef<Constant*> Ops) const { case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: - return ConstantExpr::getCast(getOpcode(), Ops[0], getType()); + return ConstantExpr::getCast(getOpcode(), Ops[0], Ty); case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); case Instruction::InsertElement: @@ -976,14 +979,14 @@ ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) { /// destroyConstant - Remove the constant from the constant table... /// void ConstantAggregateZero::destroyConstant() { - getRawType()->getContext().pImpl->AggZeroConstants.remove(this); + getType()->getContext().pImpl->AggZeroConstants.remove(this); destroyConstantImpl(); } /// destroyConstant - Remove the constant from the constant table... /// void ConstantArray::destroyConstant() { - getRawType()->getContext().pImpl->ArrayConstants.remove(this); + getType()->getContext().pImpl->ArrayConstants.remove(this); destroyConstantImpl(); } @@ -1023,44 +1026,54 @@ bool ConstantArray::isCString() const { } -/// getAsString - If the sub-element type of this array is i8 -/// then this method converts the array to an std::string and returns it. -/// Otherwise, it asserts out. +/// convertToString - Helper function for getAsString() and getAsCString(). +static std::string convertToString(const User *U, unsigned len) { + std::string Result; + Result.reserve(len); + for (unsigned i = 0; i != len; ++i) + Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue()); + return Result; +} + +/// getAsString - If this array is isString(), then this method converts the +/// array to an std::string and returns it. Otherwise, it asserts out. /// std::string ConstantArray::getAsString() const { assert(isString() && "Not a string!"); - std::string Result; - Result.reserve(getNumOperands()); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - Result.push_back((char)cast<ConstantInt>(getOperand(i))->getZExtValue()); - return Result; + return convertToString(this, getNumOperands()); } -//---- ConstantStruct::get() implementation... -// +/// getAsCString - If this array is isCString(), then this method converts the +/// array (without the trailing null byte) to an std::string and returns it. +/// Otherwise, it asserts out. +/// +std::string ConstantArray::getAsCString() const { + assert(isCString() && "Not a string!"); + return convertToString(this, getNumOperands() - 1); +} -namespace llvm { -} +//---- ConstantStruct::get() implementation... +// // destroyConstant - Remove the constant from the constant table... // void ConstantStruct::destroyConstant() { - getRawType()->getContext().pImpl->StructConstants.remove(this); + getType()->getContext().pImpl->StructConstants.remove(this); destroyConstantImpl(); } // destroyConstant - Remove the constant from the constant table... // void ConstantVector::destroyConstant() { - getRawType()->getContext().pImpl->VectorConstants.remove(this); + getType()->getContext().pImpl->VectorConstants.remove(this); destroyConstantImpl(); } /// This function will return true iff every element in this vector constant /// is set to all ones. -/// @returns true iff this constant's emements are all set to all ones. +/// @returns true iff this constant's elements are all set to all ones. /// @brief Determine if the value is all ones. bool ConstantVector::isAllOnesValue() const { // Check out first element. @@ -1068,9 +1081,10 @@ bool ConstantVector::isAllOnesValue() const { const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); if (!CI || !CI->isAllOnesValue()) return false; // Then make sure all remaining elements point to the same value. - for (unsigned I = 1, E = getNumOperands(); I < E; ++I) { - if (getOperand(I) != Elt) return false; - } + for (unsigned I = 1, E = getNumOperands(); I < E; ++I) + if (getOperand(I) != Elt) + return false; + return true; } @@ -1081,7 +1095,8 @@ Constant *ConstantVector::getSplatValue() const { Constant *Elt = getOperand(0); // Then make sure all remaining elements point to the same value. for (unsigned I = 1, E = getNumOperands(); I < E; ++I) - if (getOperand(I) != Elt) return 0; + if (getOperand(I) != Elt) + return 0; return Elt; } @@ -1095,7 +1110,7 @@ ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) { // destroyConstant - Remove the constant from the constant table... // void ConstantPointerNull::destroyConstant() { - getRawType()->getContext().pImpl->NullPtrConstants.remove(this); + getType()->getContext().pImpl->NullPtrConstants.remove(this); destroyConstantImpl(); } @@ -1110,7 +1125,7 @@ UndefValue *UndefValue::get(const Type *Ty) { // destroyConstant - Remove the constant from the constant table. // void UndefValue::destroyConstant() { - getRawType()->getContext().pImpl->UndefValueConstants.remove(this); + getType()->getContext().pImpl->UndefValueConstants.remove(this); destroyConstantImpl(); } @@ -1144,7 +1159,7 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB) // destroyConstant - Remove the constant from the constant table. // void BlockAddress::destroyConstant() { - getFunction()->getRawType()->getContext().pImpl + getFunction()->getType()->getContext().pImpl ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock())); getBasicBlock()->AdjustBlockAddressRefCount(-1); destroyConstantImpl(); @@ -1183,7 +1198,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { assert(NewBA != this && "I didn't contain From!"); // Everyone using this now uses the replacement. - uncheckedReplaceAllUsesWith(NewBA); + replaceAllUsesWith(NewBA); destroyConstant(); } @@ -1420,49 +1435,15 @@ Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) { return getFoldedCast(Instruction::BitCast, C, DstTy); } -Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode, - Constant *C1, Constant *C2, - unsigned Flags) { - // Check the operands for consistency first +Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, + unsigned Flags) { + // Check the operands for consistency first. assert(Opcode >= Instruction::BinaryOpsBegin && Opcode < Instruction::BinaryOpsEnd && "Invalid opcode in binary constant expression"); assert(C1->getType() == C2->getType() && "Operand types in binary constant expression should match"); - - if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext())) - if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2)) - return FC; // Fold a few common cases... - - std::vector<Constant*> argVec(1, C1); argVec.push_back(C2); - ExprMapKeyType Key(Opcode, argVec, 0, Flags); - LLVMContextImpl *pImpl = ReqTy->getContext().pImpl; - return pImpl->ExprConstants.getOrCreate(ReqTy, Key); -} - -Constant *ConstantExpr::getCompareTy(unsigned short predicate, - Constant *C1, Constant *C2) { - switch (predicate) { - default: llvm_unreachable("Invalid CmpInst predicate"); - case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT: - case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE: - case CmpInst::FCMP_ONE: case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO: - case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE: - case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE: - case CmpInst::FCMP_TRUE: - return getFCmp(predicate, C1, C2); - - case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT: - case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT: - case CmpInst::ICMP_SLE: - return getICmp(predicate, C1, C2); - } -} - -Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, - unsigned Flags) { #ifndef NDEBUG switch (Opcode) { case Instruction::Add: @@ -1521,7 +1502,15 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, } #endif - return getTy(C1->getType(), Opcode, C1, C2, Flags); + if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2)) + return FC; // Fold a few common cases. + + std::vector<Constant*> argVec(1, C1); + argVec.push_back(C2); + ExprMapKeyType Key(Opcode, argVec, 0, Flags); + + LLVMContextImpl *pImpl = C1->getContext().pImpl; + return pImpl->ExprConstants.getOrCreate(C1->getType(), Key); } Constant *ConstantExpr::getSizeOf(const Type* Ty) { @@ -1537,8 +1526,8 @@ Constant *ConstantExpr::getSizeOf(const Type* Ty) { Constant *ConstantExpr::getAlignOf(const Type* Ty) { // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1 // Note that a non-inbounds gep is used, as null isn't within any object. - const Type *AligningTy = StructType::get(Ty->getContext(), - Type::getInt1Ty(Ty->getContext()), Ty, NULL); + const Type *AligningTy = + StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL); Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo()); Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0); Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); @@ -1566,41 +1555,55 @@ Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) { Type::getInt64Ty(Ty->getContext())); } -Constant *ConstantExpr::getCompare(unsigned short pred, - Constant *C1, Constant *C2) { +Constant *ConstantExpr::getCompare(unsigned short Predicate, + Constant *C1, Constant *C2) { assert(C1->getType() == C2->getType() && "Op types should be identical!"); - return getCompareTy(pred, C1, C2); + + switch (Predicate) { + default: llvm_unreachable("Invalid CmpInst predicate"); + case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT: + case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ONE: case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO: + case CmpInst::FCMP_UEQ: case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE: + case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE: + case CmpInst::FCMP_TRUE: + return getFCmp(Predicate, C1, C2); + + case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SLE: + return getICmp(Predicate, C1, C2); + } } -Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C, - Constant *V1, Constant *V2) { +Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) { assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands"); - if (ReqTy == V1->getType()) - if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2)) - return SC; // Fold common cases + if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2)) + return SC; // Fold common cases std::vector<Constant*> argVec(3, C); argVec[1] = V1; argVec[2] = V2; ExprMapKeyType Key(Instruction::Select, argVec); - LLVMContextImpl *pImpl = ReqTy->getContext().pImpl; - return pImpl->ExprConstants.getOrCreate(ReqTy, Key); + LLVMContextImpl *pImpl = C->getContext().pImpl; + return pImpl->ExprConstants.getOrCreate(V1->getType(), Key); } -template<typename IndexTy> -Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C, - IndexTy const *Idxs, - unsigned NumIdx, bool InBounds) { - assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs, - Idxs+NumIdx) == - cast<PointerType>(ReqTy)->getElementType() && - "GEP indices invalid!"); - +Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs, + unsigned NumIdx, bool InBounds) { if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx)) return FC; // Fold a few common cases. + // Get the result type of the getelementptr! + const Type *Ty = + GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx); + assert(Ty && "GEP indices invalid!"); + unsigned AS = cast<PointerType>(C->getType())->getAddressSpace(); + Type *ReqTy = Ty->getPointerTo(AS); + assert(C->getType()->isPointerTy() && "Non-pointer type for constant GetElementPtr expression"); // Look up the constant in the table first to ensure uniqueness @@ -1611,32 +1614,11 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C, ArgVec.push_back(cast<Constant>(Idxs[i])); const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0, InBounds ? GEPOperator::IsInBounds : 0); - - LLVMContextImpl *pImpl = ReqTy->getContext().pImpl; + + LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } -template<typename IndexTy> -Constant *ConstantExpr::getGetElementPtrImpl(Constant *C, IndexTy const *Idxs, - unsigned NumIdx, bool InBounds) { - // Get the result type of the getelementptr! - const Type *Ty = - GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx); - assert(Ty && "GEP indices invalid!"); - unsigned As = cast<PointerType>(C->getType())->getAddressSpace(); - return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx,InBounds); -} - -Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs, - unsigned NumIdx, bool InBounds) { - return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds); -} - -Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant *const *Idxs, - unsigned NumIdx, bool InBounds) { - return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds); -} - Constant * ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) { assert(LHS->getType() == RHS->getType()); @@ -1684,39 +1666,22 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) { return pImpl->ExprConstants.getOrCreate(ResultTy, Key); } -Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, - Constant *Idx) { - if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx)) - return FC; // Fold a few common cases. - // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec(1, Val); - ArgVec.push_back(Idx); - const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec); - - LLVMContextImpl *pImpl = ReqTy->getContext().pImpl; - return pImpl->ExprConstants.getOrCreate(ReqTy, Key); -} - Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { assert(Val->getType()->isVectorTy() && "Tried to create extractelement operation on non-vector type!"); assert(Idx->getType()->isIntegerTy(32) && "Extractelement index must be i32 type!"); - return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(), - Val, Idx); -} - -Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val, - Constant *Elt, Constant *Idx) { - if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx)) + + if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx)) return FC; // Fold a few common cases. + // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec(1, Val); - ArgVec.push_back(Elt); ArgVec.push_back(Idx); - const ExprMapKeyType Key(Instruction::InsertElement,ArgVec); + const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec); - LLVMContextImpl *pImpl = ReqTy->getContext().pImpl; + LLVMContextImpl *pImpl = Val->getContext().pImpl; + Type *ReqTy = cast<VectorType>(Val->getType())->getElementType(); return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } @@ -1728,21 +1693,17 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, && "Insertelement types must match!"); assert(Idx->getType()->isIntegerTy(32) && "Insertelement index must be i32 type!"); - return getInsertElementTy(Val->getType(), Val, Elt, Idx); -} -Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1, - Constant *V2, Constant *Mask) { - if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask)) - return FC; // Fold a few common cases... + if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx)) + return FC; // Fold a few common cases. // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec(1, V1); - ArgVec.push_back(V2); - ArgVec.push_back(Mask); - const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec); + std::vector<Constant*> ArgVec(1, Val); + ArgVec.push_back(Elt); + ArgVec.push_back(Idx); + const ExprMapKeyType Key(Instruction::InsertElement,ArgVec); - LLVMContextImpl *pImpl = ReqTy->getContext().pImpl; - return pImpl->ExprConstants.getOrCreate(ReqTy, Key); + LLVMContextImpl *pImpl = Val->getContext().pImpl; + return pImpl->ExprConstants.getOrCreate(Val->getType(), Key); } Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, @@ -1750,62 +1711,49 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) && "Invalid shuffle vector constant expr operands!"); + if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask)) + return FC; // Fold a few common cases. + unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements(); const Type *EltTy = cast<VectorType>(V1->getType())->getElementType(); const Type *ShufTy = VectorType::get(EltTy, NElts); - return getShuffleVectorTy(ShufTy, V1, V2, Mask); -} -Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg, - Constant *Val, - const unsigned *Idxs, unsigned NumIdx) { - assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs, - Idxs+NumIdx) == Val->getType() && - "insertvalue indices invalid!"); - assert(Agg->getType() == ReqTy && - "insertvalue type invalid!"); - assert(Agg->getType()->isFirstClassType() && - "Non-first-class type for constant InsertValue expression"); - Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx); - assert(FC && "InsertValue constant expr couldn't be folded!"); - return FC; + // Look up the constant in the table first to ensure uniqueness + std::vector<Constant*> ArgVec(1, V1); + ArgVec.push_back(V2); + ArgVec.push_back(Mask); + const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec); + + LLVMContextImpl *pImpl = ShufTy->getContext().pImpl; + return pImpl->ExprConstants.getOrCreate(ShufTy, Key); } Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val, - const unsigned *IdxList, unsigned NumIdx) { - assert(Agg->getType()->isFirstClassType() && - "Tried to create insertelement operation on non-first-class type!"); - - const Type *ReqTy = Agg->getType(); -#ifndef NDEBUG - const Type *ValTy = - ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx); -#endif - assert(ValTy == Val->getType() && "insertvalue indices invalid!"); - return getInsertValueTy(ReqTy, Agg, Val, IdxList, NumIdx); -} - -Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg, - const unsigned *Idxs, unsigned NumIdx) { - assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs, - Idxs+NumIdx) == ReqTy && - "extractvalue indices invalid!"); + ArrayRef<unsigned> Idxs) { + assert(ExtractValueInst::getIndexedType(Agg->getType(), + Idxs) == Val->getType() && + "insertvalue indices invalid!"); assert(Agg->getType()->isFirstClassType() && - "Non-first-class type for constant extractvalue expression"); - Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx); - assert(FC && "ExtractValue constant expr couldn't be folded!"); + "Non-first-class type for constant insertvalue expression"); + Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs); + assert(FC && "insertvalue constant expr couldn't be folded!"); return FC; } Constant *ConstantExpr::getExtractValue(Constant *Agg, - const unsigned *IdxList, unsigned NumIdx) { + ArrayRef<unsigned> Idxs) { assert(Agg->getType()->isFirstClassType() && "Tried to create extractelement operation on non-first-class type!"); - const Type *ReqTy = - ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx); + const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs); + (void)ReqTy; assert(ReqTy && "extractvalue indices invalid!"); - return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx); + + assert(Agg->getType()->isFirstClassType() && + "Non-first-class type for constant extractvalue expression"); + Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs); + assert(FC && "ExtractValue constant expr couldn't be folded!"); + return FC; } Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) { @@ -1918,7 +1866,7 @@ Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) { // destroyConstant - Remove the constant from the constant table... // void ConstantExpr::destroyConstant() { - getRawType()->getContext().pImpl->ExprConstants.remove(this); + getType()->getContext().pImpl->ExprConstants.remove(this); destroyConstantImpl(); } @@ -1959,10 +1907,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); Constant *ToC = cast<Constant>(To); - LLVMContextImpl *pImpl = getRawType()->getContext().pImpl; + LLVMContextImpl *pImpl = getType()->getContext().pImpl; std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup; - Lookup.first.first = cast<ArrayType>(getRawType()); + Lookup.first.first = cast<ArrayType>(getType()); Lookup.second = this; std::vector<Constant*> &Values = Lookup.first.second; @@ -1996,7 +1944,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Constant *Replacement = 0; if (isAllZeros) { - Replacement = ConstantAggregateZero::get(getRawType()); + Replacement = ConstantAggregateZero::get(getType()); } else { // Check to see if we have this array type already. bool Exists; @@ -2032,7 +1980,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, assert(Replacement != this && "I didn't contain From!"); // Everyone using this now uses the replacement. - uncheckedReplaceAllUsesWith(Replacement); + replaceAllUsesWith(Replacement); // Delete the old constant! destroyConstant(); @@ -2047,7 +1995,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!"); std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup; - Lookup.first.first = cast<StructType>(getRawType()); + Lookup.first.first = cast<StructType>(getType()); Lookup.second = this; std::vector<Constant*> &Values = Lookup.first.second; Values.reserve(getNumOperands()); // Build replacement struct. @@ -2069,11 +2017,11 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, } Values[OperandToUpdate] = ToC; - LLVMContextImpl *pImpl = getRawType()->getContext().pImpl; + LLVMContextImpl *pImpl = getContext().pImpl; Constant *Replacement = 0; if (isAllZeros) { - Replacement = ConstantAggregateZero::get(getRawType()); + Replacement = ConstantAggregateZero::get(getType()); } else { // Check to see if we have this struct type already. bool Exists; @@ -2098,7 +2046,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, assert(Replacement != this && "I didn't contain From!"); // Everyone using this now uses the replacement. - uncheckedReplaceAllUsesWith(Replacement); + replaceAllUsesWith(Replacement); // Delete the old constant! destroyConstant(); @@ -2116,11 +2064,11 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, Values.push_back(Val); } - Constant *Replacement = get(cast<VectorType>(getRawType()), Values); + Constant *Replacement = get(Values); assert(Replacement != this && "I didn't contain From!"); // Everyone using this now uses the replacement. - uncheckedReplaceAllUsesWith(Replacement); + replaceAllUsesWith(Replacement); // Delete the old constant! destroyConstant(); @@ -2151,8 +2099,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, if (Agg == From) Agg = To; ArrayRef<unsigned> Indices = getIndices(); - Replacement = ConstantExpr::getExtractValue(Agg, - &Indices[0], Indices.size()); + Replacement = ConstantExpr::getExtractValue(Agg, Indices); } else if (getOpcode() == Instruction::InsertValue) { Constant *Agg = getOperand(0); Constant *Val = getOperand(1); @@ -2160,11 +2107,10 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, if (Val == From) Val = To; ArrayRef<unsigned> Indices = getIndices(); - Replacement = ConstantExpr::getInsertValue(Agg, Val, - &Indices[0], Indices.size()); + Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices); } else if (isCast()) { assert(getOperand(0) == From && "Cast only has one use!"); - Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType()); + Replacement = ConstantExpr::getCast(getOpcode(), To, getType()); } else if (getOpcode() == Instruction::Select) { Constant *C1 = getOperand(0); Constant *C2 = getOperand(1); @@ -2220,7 +2166,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, assert(Replacement != this && "I didn't contain From!"); // Everyone using this now uses the replacement. - uncheckedReplaceAllUsesWith(Replacement); + replaceAllUsesWith(Replacement); // Delete the old constant! destroyConstant(); diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 13957545786d..bd134d9b892d 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -568,15 +568,13 @@ struct ConstantKeyData<InlineAsm> { } }; -template<class ValType, class TypeClass, class ConstantClass, +template<class ValType, class ValRefType, class TypeClass, class ConstantClass, bool HasLargeKey = false /*true for arrays and structs*/ > -class ConstantUniqueMap : public AbstractTypeUser { +class ConstantUniqueMap { public: typedef std::pair<const TypeClass*, ValType> MapKey; typedef std::map<MapKey, ConstantClass *> MapTy; typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy; - typedef std::map<const DerivedType*, typename MapTy::iterator> - AbstractTypeMapTy; private: /// Map - This is the main map from the element descriptor to the Constants. /// This is the primary way we avoid creating two of the same shape @@ -589,10 +587,6 @@ private: /// through the map with very large keys. InverseMapTy InverseMap; - /// AbstractTypeMap - Map for abstract type constants. - /// - AbstractTypeMapTy AbstractTypeMap; - public: typename MapTy::iterator map_begin() { return Map.begin(); } typename MapTy::iterator map_end() { return Map.end(); } @@ -629,7 +623,7 @@ private: } typename MapTy::iterator I = - Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()), + Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()), ConstantKeyData<ConstantClass>::getValType(CP))); if (I == Map.end() || I->second != CP) { // FIXME: This should not use a linear scan. If this gets to be a @@ -639,24 +633,8 @@ private: } return I; } - - void AddAbstractTypeUser(const Type *Ty, typename MapTy::iterator I) { - // If the type of the constant is abstract, make sure that an entry - // exists for it in the AbstractTypeMap. - if (Ty->isAbstract()) { - const DerivedType *DTy = static_cast<const DerivedType *>(Ty); - typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(DTy); - - if (TI == AbstractTypeMap.end()) { - // Add ourselves to the ATU list of the type. - cast<DerivedType>(DTy)->addAbstractTypeUser(this); - - AbstractTypeMap.insert(TI, std::make_pair(DTy, I)); - } - } - } - ConstantClass* Create(const TypeClass *Ty, const ValType &V, + ConstantClass *Create(const TypeClass *Ty, ValRefType V, typename MapTy::iterator I) { ConstantClass* Result = ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V); @@ -667,15 +645,13 @@ private: if (HasLargeKey) // Remember the reverse mapping if needed. InverseMap.insert(std::make_pair(Result, I)); - AddAbstractTypeUser(Ty, I); - return Result; } public: /// getOrCreate - Return the specified constant from the map, creating it if /// necessary. - ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) { + ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) { MapKey Lookup(Ty, V); ConstantClass* Result = 0; @@ -692,43 +668,6 @@ public: return Result; } - void UpdateAbstractTypeMap(const DerivedType *Ty, - typename MapTy::iterator I) { - assert(AbstractTypeMap.count(Ty) && - "Abstract type not in AbstractTypeMap?"); - typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty]; - if (ATMEntryIt == I) { - // Yes, we are removing the representative entry for this type. - // See if there are any other entries of the same type. - typename MapTy::iterator TmpIt = ATMEntryIt; - - // First check the entry before this one... - if (TmpIt != Map.begin()) { - --TmpIt; - if (TmpIt->first.first != Ty) // Not the same type, move back... - ++TmpIt; - } - - // If we didn't find the same type, try to move forward... - if (TmpIt == ATMEntryIt) { - ++TmpIt; - if (TmpIt == Map.end() || TmpIt->first.first != Ty) - --TmpIt; // No entry afterwards with the same type - } - - // If there is another entry in the map of the same abstract type, - // update the AbstractTypeMap entry now. - if (TmpIt != ATMEntryIt) { - ATMEntryIt = TmpIt; - } else { - // Otherwise, we are removing the last instance of this type - // from the table. Remove from the ATM, and from user list. - cast<DerivedType>(Ty)->removeAbstractTypeUser(this); - AbstractTypeMap.erase(Ty); - } - } - } - void remove(ConstantClass *CP) { typename MapTy::iterator I = FindExistingElement(CP); assert(I != Map.end() && "Constant not found in constant table!"); @@ -736,12 +675,6 @@ public: if (HasLargeKey) // Remember the reverse mapping if needed. InverseMap.erase(CP); - - // Now that we found the entry, make sure this isn't the entry that - // the AbstractTypeMap points to. - const TypeClass *Ty = I->first.first; - if (Ty->isAbstract()) - UpdateAbstractTypeMap(static_cast<const DerivedType *>(Ty), I); Map.erase(I); } @@ -755,22 +688,7 @@ public: assert(OldI != Map.end() && "Constant not found in constant table!"); assert(OldI->second == C && "Didn't find correct element?"); - // If this constant is the representative element for its abstract type, - // update the AbstractTypeMap so that the representative element is I. - // - // This must use getRawType() because if the type is under refinement, we - // will get the refineAbstractType callback below, and we don't want to - // kick union find in on the constant. - if (C->getRawType()->isAbstract()) { - typename AbstractTypeMapTy::iterator ATI = - AbstractTypeMap.find(cast<DerivedType>(C->getRawType())); - assert(ATI != AbstractTypeMap.end() && - "Abstract type not in AbstractTypeMap?"); - if (ATI->second == OldI) - ATI->second = I; - } - - // Remove the old entry from the map. + // Remove the old entry from the map. Map.erase(OldI); // Update the inverse map so that we know that this constant is now @@ -780,58 +698,6 @@ public: InverseMap[C] = I; } } - - void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) { - typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(OldTy); - - assert(I != AbstractTypeMap.end() && - "Abstract type not in AbstractTypeMap?"); - - // Convert a constant at a time until the last one is gone. The last one - // leaving will remove() itself, causing the AbstractTypeMapEntry to be - // eliminated eventually. - do { - ConstantClass *C = I->second->second; - MapKey Key(cast<TypeClass>(NewTy), - ConstantKeyData<ConstantClass>::getValType(C)); - - std::pair<typename MapTy::iterator, bool> IP = - Map.insert(std::make_pair(Key, C)); - if (IP.second) { - // The map didn't previously have an appropriate constant in the - // new type. - - // Remove the old entry. - typename MapTy::iterator OldI = - Map.find(MapKey(cast<TypeClass>(OldTy), IP.first->first.second)); - assert(OldI != Map.end() && "Constant not in map!"); - UpdateAbstractTypeMap(OldTy, OldI); - Map.erase(OldI); - - // Set the constant's type. This is done in place! - setType(C, NewTy); - - // Update the inverse map so that we know that this constant is now - // located at descriptor I. - if (HasLargeKey) - InverseMap[C] = IP.first; - - AddAbstractTypeUser(NewTy, IP.first); - } else { - // The map already had an appropriate constant in the new type, so - // there's no longer a need for the old constant. - C->uncheckedReplaceAllUsesWith(IP.first->second); - C->destroyConstant(); // This constant is now dead, destroy it. - } - I = AbstractTypeMap.find(OldTy); - } while (I != AbstractTypeMap.end()); - } - - // If the type became concrete without being refined to any other existing - // type, we just remove ourselves from the ATU list. - void typeBecameConcrete(const DerivedType *AbsTy) { - AbsTy->removeAbstractTypeUser(this); - } void dump() const { DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n"); diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 92f944027a7c..2a816e123a61 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -19,7 +19,6 @@ #include "llvm/GlobalVariable.h" #include "llvm/GlobalAlias.h" #include "llvm/LLVMContext.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" #include "llvm/PassManager.h" @@ -111,27 +110,6 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) { unwrap(M)->setTargetTriple(Triple); } -/*--.. Type names ..........................................................--*/ -LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) { - return unwrap(M)->addTypeName(Name, unwrap(Ty)); -} - -void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name) { - TypeSymbolTable &TST = unwrap(M)->getTypeSymbolTable(); - - TypeSymbolTable::iterator I = TST.find(Name); - if (I != TST.end()) - TST.remove(I); -} - -LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) { - return wrap(unwrap(M)->getTypeByName(Name)); -} - -const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty) { - return unwrap(M)->getTypeName(unwrap(Ty)).c_str(); -} - void LLVMDumpModule(LLVMModuleRef M) { unwrap(M)->dump(); } @@ -182,8 +160,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMArrayTypeKind; case Type::PointerTyID: return LLVMPointerTypeKind; - case Type::OpaqueTyID: - return LLVMOpaqueTypeKind; case Type::VectorTyID: return LLVMVectorTypeKind; case Type::X86_MMXTyID: @@ -284,10 +260,7 @@ LLVMTypeRef LLVMX86MMXType(void) { LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, LLVMBool IsVarArg) { - std::vector<const Type*> Tys; - for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I) - Tys.push_back(unwrap(*I)); - + ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount); return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0)); } @@ -314,11 +287,7 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) { LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed) { - std::vector<const Type*> Tys; - for (LLVMTypeRef *I = ElementTypes, - *E = ElementTypes + ElementCount; I != E; ++I) - Tys.push_back(unwrap(*I)); - + ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount); return wrap(StructType::get(*unwrap(C), Tys, Packed != 0)); } @@ -328,6 +297,16 @@ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, ElementCount, Packed); } +LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name) +{ + return wrap(StructType::createNamed(*unwrap(C), Name)); +} + +void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes, + unsigned ElementCount, LLVMBool Packed) { + ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount); + unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0); +} unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->getNumElements(); @@ -344,6 +323,14 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->isPacked(); } +LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) { + return unwrap<StructType>(StructTy)->isOpaque(); +} + +LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) { + return wrap(unwrap(M)->getTypeByName(Name)); +} + /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { @@ -382,9 +369,6 @@ LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C) { LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) { return wrap(Type::getLabelTy(*unwrap(C))); } -LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C) { - return wrap(OpaqueType::get(*unwrap(C))); -} LLVMTypeRef LLVMVoidType(void) { return LLVMVoidTypeInContext(LLVMGetGlobalContext()); @@ -392,28 +376,6 @@ LLVMTypeRef LLVMVoidType(void) { LLVMTypeRef LLVMLabelType(void) { return LLVMLabelTypeInContext(LLVMGetGlobalContext()); } -LLVMTypeRef LLVMOpaqueType(void) { - return LLVMOpaqueTypeInContext(LLVMGetGlobalContext()); -} - -/*--.. Operations on type handles ..........................................--*/ - -LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy) { - return wrap(new PATypeHolder(unwrap(PotentiallyAbstractTy))); -} - -void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle) { - delete unwrap(TypeHandle); -} - -LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle) { - return wrap(unwrap(TypeHandle)->get()); -} - -void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy) { - unwrap<DerivedType>(AbstractTy)->refineAbstractTypeTo(unwrap(ConcreteTy)); -} - /*===-- Operations on values ----------------------------------------------===*/ @@ -612,9 +574,10 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed) { - return wrap(ConstantStruct::get(*unwrap(C), - unwrap<Constant>(ConstantVals, Count), - Count, Packed != 0)); + Constant **Elements = unwrap<Constant>(ConstantVals, Count); + return wrap(ConstantStruct::getAnon(*unwrap(C), + ArrayRef<Constant*>(Elements, Count), + Packed != 0)); } LLVMValueRef LLVMConstString(const char *Str, unsigned Length, @@ -624,15 +587,24 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length, } LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals, unsigned Length) { - return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), - unwrap<Constant>(ConstantVals, Length), - Length)); + ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length); + return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V)); } LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed) { return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count, Packed); } + +LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy, + LLVMValueRef *ConstantVals, + unsigned Count) { + Constant **Elements = unwrap<Constant>(ConstantVals, Count); + const StructType *Ty = cast<StructType>(unwrap(StructTy)); + + return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count))); +} + LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) { return wrap(ConstantVector::get(ArrayRef<Constant*>( unwrap<Constant>(ScalarConstantVals, Size), Size))); @@ -962,7 +934,8 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, unsigned NumIdx) { return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant), - IdxList, NumIdx)); + ArrayRef<unsigned>(IdxList, + NumIdx))); } LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, @@ -970,7 +943,8 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, unsigned *IdxList, unsigned NumIdx) { return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant), unwrap<Constant>(ElementValueConstant), - IdxList, NumIdx)); + ArrayRef<unsigned>(IdxList, + NumIdx))); } LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, @@ -1706,7 +1680,7 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name) { return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch), - unwrap(Args), unwrap(Args) + NumArgs, + ArrayRef<Value *>(unwrap(Args), NumArgs), Name)); } @@ -2089,8 +2063,9 @@ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name) { - return wrap(unwrap(B)->CreateCall(unwrap(Fn), unwrap(Args), - unwrap(Args) + NumArgs, Name)); + return wrap(unwrap(B)->CreateCall(unwrap(Fn), + ArrayRef<Value *>(unwrap(Args), NumArgs), + Name)); } LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If, diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp index 520333cbbcf2..4ff6b2cd80e8 100644 --- a/lib/VMCore/DebugLoc.cpp +++ b/lib/VMCore/DebugLoc.cpp @@ -128,6 +128,38 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) { return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3))); } +/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc. +DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) { + if (N == 0 || N->getNumOperands() < 3) return DebugLoc(); + + MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(1)); + if (Scope == 0) return DebugLoc(); + + unsigned LineNo = 0, ColNo = 0; + if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(2))) + LineNo = Line->getZExtValue(); + if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(3))) + ColNo = Col->getZExtValue(); + + return get(LineNo, ColNo, Scope, NULL); +} + +void DebugLoc::dump(const LLVMContext &Ctx) const { +#ifndef NDEBUG + if (!isUnknown()) { + dbgs() << getLine(); + if (getCol() != 0) + dbgs() << ',' << getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + dbgs() << " @ "; + InlinedAtDL.dump(Ctx); + } else + dbgs() << "\n"; + } +#endif +} + //===----------------------------------------------------------------------===// // DenseMap specialization //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 0ae0bdb8056a..6536bcd0e2ed 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -134,7 +134,7 @@ LLVMContext &Function::getContext() const { return getType()->getContext(); } -const FunctionType *Function::getFunctionType() const { +FunctionType *Function::getFunctionType() const { return cast<FunctionType>(getType()->getElementType()); } @@ -142,7 +142,7 @@ bool Function::isVarArg() const { return getFunctionType()->isVarArg(); } -const Type *Function::getReturnType() const { +Type *Function::getReturnType() const { return getFunctionType()->getReturnType(); } @@ -163,7 +163,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage, : GlobalValue(PointerType::getUnqual(Ty), Value::FunctionVal, 0, 0, Linkage, name) { assert(FunctionType::isValidReturnType(getReturnType()) && - !getReturnType()->isOpaqueTy() && "invalid return type"); + "invalid return type"); SymTab = new ValueSymbolTable(); // If the function has arguments, mark them as lazily built. @@ -333,7 +333,7 @@ unsigned Function::getIntrinsicID() const { return 0; } -std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { +std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) { assert(id < num_intrinsics && "Invalid intrinsic ID!"); static const char * const Table[] = { "not_intrinsic", @@ -341,10 +341,10 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { #include "llvm/Intrinsics.gen" #undef GET_INTRINSIC_NAME_TABLE }; - if (numTys == 0) + if (Tys.empty()) return Table[id]; std::string Result(Table[id]); - for (unsigned i = 0; i < numTys; ++i) { + for (unsigned i = 0; i < Tys.size(); ++i) { if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) { Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + EVT::getEVT(PTyp->getElementType()).getEVTString(); @@ -356,10 +356,9 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { } const FunctionType *Intrinsic::getType(LLVMContext &Context, - ID id, const Type **Tys, - unsigned numTys) { + ID id, ArrayRef<Type*> Tys) { const Type *ResultTy = NULL; - std::vector<const Type*> ArgTys; + std::vector<Type*> ArgTys; bool IsVarArg = false; #define GET_INTRINSIC_GENERATOR @@ -384,14 +383,12 @@ bool Intrinsic::isOverloaded(ID id) { #include "llvm/Intrinsics.gen" #undef GET_INTRINSIC_ATTRIBUTES -Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys, - unsigned numTys) { +Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) { // There can never be multiple globals with the same name of different types, // because intrinsics must be a specific type. return - cast<Function>(M->getOrInsertFunction(getName(id, Tys, numTys), - getType(M->getContext(), - id, Tys, numTys))); + cast<Function>(M->getOrInsertFunction(getName(id, Tys), + getType(M->getContext(), id, Tys))); } // This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method. @@ -417,7 +414,7 @@ bool Function::hasAddressTaken(const User* *PutOffender) const { /// setjmp or other function that gcc recognizes as "returning twice". /// /// FIXME: Remove after <rdar://problem/8031714> is fixed. -/// FIXME: Is the obove FIXME valid? +/// FIXME: Is the above FIXME valid? bool Function::callsFunctionThatReturnsTwice() const { const Module *M = this->getParent(); static const char *ReturnsTwiceFns[] = { diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index 60000ad1b50e..db008e09d1c8 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -51,6 +51,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { setAlignment(Src->getAlignment()); setSection(Src->getSection()); setVisibility(Src->getVisibility()); + setUnnamedAddr(Src->hasUnnamedAddr()); } void GlobalValue::setAlignment(unsigned Align) { @@ -60,6 +61,20 @@ void GlobalValue::setAlignment(unsigned Align) { Alignment = Log2_32(Align) + 1; assert(getAlignment() == Align && "Alignment representation error!"); } + +bool GlobalValue::isDeclaration() const { + // Globals are definitions if they have an initializer. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this)) + return GV->getNumOperands() == 0; + + // Functions are definitions if they have a body. + if (const Function *F = dyn_cast<Function>(this)) + return F->empty(); + + // Aliases are always definitions. + assert(isa<GlobalAlias>(this)); + return false; +} //===----------------------------------------------------------------------===// // GlobalVariable Implementation @@ -201,39 +216,26 @@ void GlobalAlias::eraseFromParent() { getParent()->getAliasList().erase(this); } -bool GlobalAlias::isDeclaration() const { - const GlobalValue* AV = getAliasedGlobal(); - if (AV) - return AV->isDeclaration(); - else - return false; -} - -void GlobalAlias::setAliasee(Constant *Aliasee) -{ - if (Aliasee) - assert(Aliasee->getType() == getType() && - "Alias and aliasee types should match!"); +void GlobalAlias::setAliasee(Constant *Aliasee) { + assert((!Aliasee || Aliasee->getType() == getType()) && + "Alias and aliasee types should match!"); setOperand(0, Aliasee); } const GlobalValue *GlobalAlias::getAliasedGlobal() const { const Constant *C = getAliasee(); - if (C) { - if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) - return GV; - else { - const ConstantExpr *CE = 0; - if ((CE = dyn_cast<ConstantExpr>(C)) && - (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr)) - return dyn_cast<GlobalValue>(CE->getOperand(0)); - else - llvm_unreachable("Unsupported aliasee"); - } - } - return 0; + if (C == 0) return 0; + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return GV; + + const ConstantExpr *CE = cast<ConstantExpr>(C); + assert((CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) && + "Unsupported aliasee"); + + return dyn_cast<GlobalValue>(CE->getOperand(0)); } const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const { @@ -254,7 +256,7 @@ const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const { GV = GA->getAliasedGlobal(); if (!Visited.insert(GV)) - return NULL; + return 0; } return GV; diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index f2d469a2d84f..ffe961fee7c2 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -34,7 +34,7 @@ Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { return GV; } -const Type *IRBuilderBase::getCurrentFunctionReturnType() const { +Type *IRBuilderBase::getCurrentFunctionReturnType() const { assert(BB && BB->getParent() && "No current function!"); return BB->getParent()->getReturnType(); } @@ -52,9 +52,9 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) { return BCI; } -static CallInst *createCallHelper(Value *Callee, Value *const* Ops, - unsigned NumOps, IRBuilderBase *Builder) { - CallInst *CI = CallInst::Create(Callee, Ops, Ops + NumOps, ""); +static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops, + IRBuilderBase *Builder) { + CallInst *CI = CallInst::Create(Callee, Ops, ""); Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI); Builder->SetInstDebugLocation(CI); return CI; @@ -65,11 +65,11 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag) { Ptr = getCastedInt8PtrValue(Ptr); Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) }; - const Type *Tys[] = { Ptr->getType(), Size->getType() }; + Type *Tys[] = { Ptr->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); - Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); + Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); - CallInst *CI = createCallHelper(TheFn, Ops, 5, this); + CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the TBAA info if present. if (TBAATag) @@ -85,11 +85,11 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, Src = getCastedInt8PtrValue(Src); Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; - const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; + Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); - Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys, 3); + Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); - CallInst *CI = createCallHelper(TheFn, Ops, 5, this); + CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the TBAA info if present. if (TBAATag) @@ -105,11 +105,11 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, Src = getCastedInt8PtrValue(Src); Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; - const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; + Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); - Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys, 3); + Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys); - CallInst *CI = createCallHelper(TheFn, Ops, 5, this); + CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the TBAA info if present. if (TBAATag) @@ -130,7 +130,7 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) { Value *Ops[] = { Size, Ptr }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start); - return createCallHelper(TheFn, Ops, 2, this); + return createCallHelper(TheFn, Ops, this); } CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) { @@ -145,5 +145,5 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) { Value *Ops[] = { Size, Ptr }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end); - return createCallHelper(TheFn, Ops, 2, this); + return createCallHelper(TheFn, Ops, this); } diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index bd3667db7614..4a03b395e98e 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -47,11 +47,11 @@ InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString, } void InlineAsm::destroyConstant() { - getRawType()->getContext().pImpl->InlineAsms.remove(this); + getType()->getContext().pImpl->InlineAsms.remove(this); delete this; } -const FunctionType *InlineAsm::getFunctionType() const { +FunctionType *InlineAsm::getFunctionType() const { return cast<FunctionType>(getType()->getElementType()); } diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 2c8b8b23b18e..02c075743959 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -204,22 +204,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { if (const InvokeInst *CI = dyn_cast<InvokeInst>(this)) return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() && CI->getAttributes() == cast<InvokeInst>(I)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) { - if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices()) - return false; - for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) - if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i]) - return false; - return true; - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) { - if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices()) - return false; - for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) - if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i]) - return false; - return true; - } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) + return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) + return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices(); return true; } @@ -256,22 +244,10 @@ bool Instruction::isSameOperationAs(const Instruction *I) const { return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() && CI->getAttributes() == cast<InvokeInst>(I)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) { - if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices()) - return false; - for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) - if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i]) - return false; - return true; - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) { - if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices()) - return false; - for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) - if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i]) - return false; - return true; - } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) + return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) + return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices(); return true; } @@ -429,8 +405,10 @@ Instruction *Instruction::clone() const { // Otherwise, enumerate and copy over metadata from the old instruction to the // new one. SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs; - getAllMetadata(TheMDs); + getAllMetadataOtherThanDebugLoc(TheMDs); for (unsigned i = 0, e = TheMDs.size(); i != e; ++i) New->setMetadata(TheMDs[i].first, TheMDs[i].second); + + New->setDebugLoc(getDebugLoc()); return New; } diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 8f4eabeb8aee..9baad09cb272 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -87,11 +87,8 @@ PHINode::PHINode(const PHINode &PN) : Instruction(PN.getType(), Instruction::PHI, allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()), ReservedSpace(PN.getNumOperands()) { - Use *OL = OperandList; - for (unsigned i = 0, e = PN.getNumOperands(); i != e; i+=2) { - OL[i] = PN.getOperand(i); - OL[i+1] = PN.getOperand(i+1); - } + std::copy(PN.op_begin(), PN.op_end(), op_begin()); + std::copy(PN.block_begin(), PN.block_end(), block_begin()); SubclassOptionalData = PN.SubclassOptionalData; } @@ -99,31 +96,37 @@ PHINode::~PHINode() { dropHungoffUses(); } +Use *PHINode::allocHungoffUses(unsigned N) const { + // Allocate the array of Uses of the incoming values, followed by a pointer + // (with bottom bit set) to the User, followed by the array of pointers to + // the incoming basic blocks. + size_t size = N * sizeof(Use) + sizeof(Use::UserRef) + + N * sizeof(BasicBlock*); + Use *Begin = static_cast<Use*>(::operator new(size)); + Use *End = Begin + N; + (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1); + return Use::initTags(Begin, End); +} + // removeIncomingValue - Remove an incoming value. This is useful if a // predecessor basic block is deleted. Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) { - unsigned NumOps = getNumOperands(); - Use *OL = OperandList; - assert(Idx*2 < NumOps && "BB not in PHI node!"); - Value *Removed = OL[Idx*2]; + Value *Removed = getIncomingValue(Idx); // Move everything after this operand down. // // FIXME: we could just swap with the end of the list, then erase. However, - // client might not expect this to happen. The code as it is thrashes the + // clients might not expect this to happen. The code as it is thrashes the // use/def lists, which is kinda lame. - for (unsigned i = (Idx+1)*2; i != NumOps; i += 2) { - OL[i-2] = OL[i]; - OL[i-2+1] = OL[i+1]; - } + std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx); + std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx); // Nuke the last value. - OL[NumOps-2].set(0); - OL[NumOps-2+1].set(0); - NumOperands = NumOps-2; + Op<-1>().set(0); + --NumOperands; // If the PHI node is dead, because it has zero entries, nuke it now. - if (NumOps == 2 && DeletePHIIfEmpty) { + if (getNumOperands() == 0 && DeletePHIIfEmpty) { // If anyone is using this PHI, make them use a dummy value instead... replaceAllUsesWith(UndefValue::get(getType())); eraseFromParent(); @@ -137,15 +140,18 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) { /// void PHINode::growOperands() { unsigned e = getNumOperands(); - // Multiply by 1.5 and round down so the result is still even. - unsigned NumOps = e + e / 4 * 2; - if (NumOps < 4) NumOps = 4; // 4 op PHI nodes are VERY common. + unsigned NumOps = e + e / 2; + if (NumOps < 2) NumOps = 2; // 2 op PHI nodes are VERY common. + + Use *OldOps = op_begin(); + BasicBlock **OldBlocks = block_begin(); ReservedSpace = NumOps; - Use *OldOps = OperandList; - Use *NewOps = allocHungoffUses(NumOps); - std::copy(OldOps, OldOps + e, NewOps); - OperandList = NewOps; + OperandList = allocHungoffUses(ReservedSpace); + + std::copy(OldOps, OldOps + e, op_begin()); + std::copy(OldBlocks, OldBlocks + e, block_begin()); + Use::zap(OldOps, OldOps + e, true); } @@ -168,95 +174,42 @@ Value *PHINode::hasConstantValue() const { CallInst::~CallInst() { } -void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { - assert(NumOperands == NumParams+1 && "NumOperands not set up?"); +void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) { + assert(NumOperands == Args.size() + 1 && "NumOperands not set up?"); Op<-1>() = Func; +#ifndef NDEBUG const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); - (void)FTy; // silence warning. - assert((NumParams == FTy->getNumParams() || - (FTy->isVarArg() && NumParams > FTy->getNumParams())) && + assert((Args.size() == FTy->getNumParams() || + (FTy->isVarArg() && Args.size() > FTy->getNumParams())) && "Calling a function with bad signature!"); - for (unsigned i = 0; i != NumParams; ++i) { + + for (unsigned i = 0; i != Args.size(); ++i) assert((i >= FTy->getNumParams() || - FTy->getParamType(i) == Params[i]->getType()) && + FTy->getParamType(i) == Args[i]->getType()) && "Calling a function with a bad signature!"); - OperandList[i] = Params[i]; - } -} - -void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { - assert(NumOperands == 3 && "NumOperands not set up?"); - Op<-1>() = Func; - Op<0>() = Actual1; - Op<1>() = Actual2; - - const FunctionType *FTy = - cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); - (void)FTy; // silence warning. - - assert((FTy->getNumParams() == 2 || - (FTy->isVarArg() && FTy->getNumParams() < 2)) && - "Calling a function with bad signature"); - assert((0 >= FTy->getNumParams() || - FTy->getParamType(0) == Actual1->getType()) && - "Calling a function with a bad signature!"); - assert((1 >= FTy->getNumParams() || - FTy->getParamType(1) == Actual2->getType()) && - "Calling a function with a bad signature!"); -} - -void CallInst::init(Value *Func, Value *Actual) { - assert(NumOperands == 2 && "NumOperands not set up?"); - Op<-1>() = Func; - Op<0>() = Actual; - - const FunctionType *FTy = - cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); - (void)FTy; // silence warning. +#endif - assert((FTy->getNumParams() == 1 || - (FTy->isVarArg() && FTy->getNumParams() == 0)) && - "Calling a function with bad signature"); - assert((0 == FTy->getNumParams() || - FTy->getParamType(0) == Actual->getType()) && - "Calling a function with a bad signature!"); + std::copy(Args.begin(), Args.end(), op_begin()); + setName(NameStr); } -void CallInst::init(Value *Func) { +void CallInst::init(Value *Func, const Twine &NameStr) { assert(NumOperands == 1 && "NumOperands not set up?"); Op<-1>() = Func; +#ifndef NDEBUG const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); - (void)FTy; // silence warning. assert(FTy->getNumParams() == 0 && "Calling a function with bad signature"); -} +#endif -CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name, - Instruction *InsertBefore) - : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType()) - ->getElementType())->getReturnType(), - Instruction::Call, - OperandTraits<CallInst>::op_end(this) - 2, - 2, InsertBefore) { - init(Func, Actual); - setName(Name); + setName(NameStr); } -CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name, - BasicBlock *InsertAtEnd) - : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType()) - ->getElementType())->getReturnType(), - Instruction::Call, - OperandTraits<CallInst>::op_end(this) - 2, - 2, InsertAtEnd) { - init(Func, Actual); - setName(Name); -} CallInst::CallInst(Value *Func, const Twine &Name, Instruction *InsertBefore) : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType()) @@ -264,8 +217,7 @@ CallInst::CallInst(Value *Func, const Twine &Name, Instruction::Call, OperandTraits<CallInst>::op_end(this) - 1, 1, InsertBefore) { - init(Func); - setName(Name); + init(Func, Name); } CallInst::CallInst(Value *Func, const Twine &Name, @@ -275,8 +227,7 @@ CallInst::CallInst(Value *Func, const Twine &Name, Instruction::Call, OperandTraits<CallInst>::op_end(this) - 1, 1, InsertAtEnd) { - init(Func); - setName(Name); + init(Func, Name); } CallInst::CallInst(const CallInst &CI) @@ -287,10 +238,7 @@ CallInst::CallInst(const CallInst &CI) setTailCall(CI.isTailCall()); setCallingConv(CI.getCallingConv()); - Use *OL = OperandList; - Use *InOL = CI.OperandList; - for (unsigned i = 0, e = CI.getNumOperands(); i != e; ++i) - OL[i] = InOL[i]; + std::copy(CI.op_begin(), CI.op_end(), op_begin()); SubclassOptionalData = CI.SubclassOptionalData; } @@ -366,7 +314,7 @@ static Instruction *createMalloc(Instruction *InsertBefore, // Create the call to Malloc. BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd; Module* M = BB->getParent()->getParent(); - const Type *BPTy = Type::getInt8PtrTy(BB->getContext()); + Type *BPTy = Type::getInt8PtrTy(BB->getContext()); Value *MallocFunc = MallocF; if (!MallocFunc) // prototype malloc as "void *malloc(size_t)" @@ -481,27 +429,28 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) { //===----------------------------------------------------------------------===// void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException, - Value* const *Args, unsigned NumArgs) { - assert(NumOperands == 3+NumArgs && "NumOperands not set up?"); + ArrayRef<Value *> Args, const Twine &NameStr) { + assert(NumOperands == 3 + Args.size() && "NumOperands not set up?"); Op<-3>() = Fn; Op<-2>() = IfNormal; Op<-1>() = IfException; + +#ifndef NDEBUG const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()); - (void)FTy; // silence warning. - assert(((NumArgs == FTy->getNumParams()) || - (FTy->isVarArg() && NumArgs > FTy->getNumParams())) && + assert(((Args.size() == FTy->getNumParams()) || + (FTy->isVarArg() && Args.size() > FTy->getNumParams())) && "Invoking a function with bad signature"); - Use *OL = OperandList; - for (unsigned i = 0, e = NumArgs; i != e; i++) { + for (unsigned i = 0, e = Args.size(); i != e; i++) assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Args[i]->getType()) && "Invoking a function with a bad signature!"); - - OL[i] = Args[i]; - } +#endif + + std::copy(Args.begin(), Args.end(), op_begin()); + setName(NameStr); } InvokeInst::InvokeInst(const InvokeInst &II) @@ -511,9 +460,7 @@ InvokeInst::InvokeInst(const InvokeInst &II) II.getNumOperands()) { setAttributes(II.getAttributes()); setCallingConv(II.getCallingConv()); - Use *OL = OperandList, *InOL = II.OperandList; - for (unsigned i = 0, e = II.getNumOperands(); i != e; ++i) - OL[i] = InOL[i]; + std::copy(II.op_begin(), II.op_end(), op_begin()); SubclassOptionalData = II.SubclassOptionalData; } @@ -817,7 +764,7 @@ bool AllocaInst::isArrayAllocation() const { return true; } -const Type *AllocaInst::getAllocatedType() const { +Type *AllocaInst::getAllocatedType() const { return getType()->getElementType(); } @@ -1092,7 +1039,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, const Twine &Name, Instruction *InBe) : Instruction(PointerType::get( - checkType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)), + checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)), GetElementPtr, OperandTraits<GetElementPtrInst>::op_end(this) - 2, 2, InBe) { @@ -1102,7 +1049,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, const Twine &Name, BasicBlock *IAE) : Instruction(PointerType::get( - checkType(getIndexedType(Ptr->getType(),Idx)), + checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)), GetElementPtr, OperandTraits<GetElementPtrInst>::op_end(this) - 2, @@ -1120,60 +1067,50 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx, /// pointer type. /// template <typename IndexTy> -static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs, - unsigned NumIdx) { +static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs, + unsigned NumIdx) { const PointerType *PTy = dyn_cast<PointerType>(Ptr); if (!PTy) return 0; // Type isn't a pointer type! - const Type *Agg = PTy->getElementType(); + Type *Agg = PTy->getElementType(); // Handle the special case of the empty set index set, which is always valid. if (NumIdx == 0) return Agg; // If there is at least one index, the top level type must be sized, otherwise - // it cannot be 'stepped over'. We explicitly allow abstract types (those - // that contain opaque types) under the assumption that it will be resolved to - // a sane type later. - if (!Agg->isSized() && !Agg->isAbstract()) + // it cannot be 'stepped over'. + if (!Agg->isSized()) return 0; unsigned CurIdx = 1; for (; CurIdx != NumIdx; ++CurIdx) { - const CompositeType *CT = dyn_cast<CompositeType>(Agg); + CompositeType *CT = dyn_cast<CompositeType>(Agg); if (!CT || CT->isPointerTy()) return 0; IndexTy Index = Idxs[CurIdx]; if (!CT->indexValid(Index)) return 0; Agg = CT->getTypeAtIndex(Index); - - // If the new type forwards to another type, then it is in the middle - // of being refined to another type (and hence, may have dropped all - // references to what it was using before). So, use the new forwarded - // type. - if (const Type *Ty = Agg->getForwardedType()) - Agg = Ty; } return CurIdx == NumIdx ? Agg : 0; } -const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, - Value* const *Idxs, - unsigned NumIdx) { +Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs, + unsigned NumIdx) { return getIndexedTypeInternal(Ptr, Idxs, NumIdx); } -const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, - Constant* const *Idxs, - unsigned NumIdx) { +Type *GetElementPtrInst::getIndexedType(const Type *Ptr, + Constant* const *Idxs, + unsigned NumIdx) { return getIndexedTypeInternal(Ptr, Idxs, NumIdx); } -const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, - uint64_t const *Idxs, - unsigned NumIdx) { +Type *GetElementPtrInst::getIndexedType(const Type *Ptr, + uint64_t const *Idxs, + unsigned NumIdx) { return getIndexedTypeInternal(Ptr, Idxs, NumIdx); } -const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) { +Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) { const PointerType *PTy = dyn_cast<PointerType>(Ptr); if (!PTy) return 0; // Type isn't a pointer type! @@ -1390,27 +1327,22 @@ int ShuffleVectorInst::getMaskValue(unsigned i) const { // InsertValueInst Class //===----------------------------------------------------------------------===// -void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, - unsigned NumIdx, const Twine &Name) { +void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, + const Twine &Name) { assert(NumOperands == 2 && "NumOperands not initialized?"); - assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx, Idx + NumIdx) == - Val->getType() && "Inserted value must match indexed type!"); - Op<0>() = Agg; - Op<1>() = Val; - Indices.append(Idx, Idx + NumIdx); - setName(Name); -} + // There's no fundamental reason why we require at least one index + // (other than weirdness with &*IdxBegin being invalid; see + // getelementptr's init routine for example). But there's no + // present need to support it. + assert(Idxs.size() > 0 && "InsertValueInst must have at least one index"); -void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx, - const Twine &Name) { - assert(NumOperands == 2 && "NumOperands not initialized?"); - assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx) == Val->getType() - && "Inserted value must match indexed type!"); + assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) == + Val->getType() && "Inserted value must match indexed type!"); Op<0>() = Agg; Op<1>() = Val; - Indices.push_back(Idx); + Indices.append(Idxs.begin(), Idxs.end()); setName(Name); } @@ -1423,44 +1355,18 @@ InsertValueInst::InsertValueInst(const InsertValueInst &IVI) SubclassOptionalData = IVI.SubclassOptionalData; } -InsertValueInst::InsertValueInst(Value *Agg, - Value *Val, - unsigned Idx, - const Twine &Name, - Instruction *InsertBefore) - : Instruction(Agg->getType(), InsertValue, - OperandTraits<InsertValueInst>::op_begin(this), - 2, InsertBefore) { - init(Agg, Val, Idx, Name); -} - -InsertValueInst::InsertValueInst(Value *Agg, - Value *Val, - unsigned Idx, - const Twine &Name, - BasicBlock *InsertAtEnd) - : Instruction(Agg->getType(), InsertValue, - OperandTraits<InsertValueInst>::op_begin(this), - 2, InsertAtEnd) { - init(Agg, Val, Idx, Name); -} - //===----------------------------------------------------------------------===// // ExtractValueInst Class //===----------------------------------------------------------------------===// -void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx, - const Twine &Name) { +void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) { assert(NumOperands == 1 && "NumOperands not initialized?"); - Indices.append(Idx, Idx + NumIdx); - setName(Name); -} - -void ExtractValueInst::init(unsigned Idx, const Twine &Name) { - assert(NumOperands == 1 && "NumOperands not initialized?"); + // There's no fundamental reason why we require at least one index. + // But there's no present need to support it. + assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index"); - Indices.push_back(Idx); + Indices.append(Idxs.begin(), Idxs.end()); setName(Name); } @@ -1476,10 +1382,9 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI) // A null type is returned if the indices are invalid for the specified // pointer type. // -const Type* ExtractValueInst::getIndexedType(const Type *Agg, - const unsigned *Idxs, - unsigned NumIdx) { - for (unsigned CurIdx = 0; CurIdx != NumIdx; ++CurIdx) { +Type *ExtractValueInst::getIndexedType(const Type *Agg, + ArrayRef<unsigned> Idxs) { + for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) { unsigned Index = Idxs[CurIdx]; // We can't use CompositeType::indexValid(Index) here. // indexValid() always returns true for arrays because getelementptr allows @@ -1499,20 +1404,8 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg, } Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index); - - // If the new type forwards to another type, then it is in the middle - // of being refined to another type (and hence, may have dropped all - // references to what it was using before). So, use the new forwarded - // type. - if (const Type *Ty = Agg->getForwardedType()) - Agg = Ty; } - return Agg; -} - -const Type* ExtractValueInst::getIndexedType(const Type *Agg, - unsigned Idx) { - return getIndexedType(Agg, &Idx, 1); + return const_cast<Type*>(Agg); } //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index 1bd497d05d4e..ebd1e0aa1b0f 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -39,6 +39,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { // Create the 'tbaa' metadata kind. unsigned TBAAID = getMDKindID("tbaa"); assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID; + + // Create the 'prof' metadata kind. + unsigned ProfID = getMDKindID("prof"); + assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID; } LLVMContext::~LLVMContext() { delete pImpl; } diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index ccb8dc500fcd..504b37267f70 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -13,6 +13,7 @@ #include "LLVMContextImpl.h" #include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" #include <algorithm> using namespace llvm; @@ -31,14 +32,10 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) Int8Ty(C, 8), Int16Ty(C, 16), Int32Ty(C, 32), - Int64Ty(C, 64), - AlwaysOpaqueTy(new OpaqueType(C)) { + Int64Ty(C, 64) { InlineAsmDiagHandler = 0; InlineAsmDiagContext = 0; - - // Make sure the AlwaysOpaqueTy stays alive as long as the Context. - AlwaysOpaqueTy->addRef(); - OpaqueTypes.insert(AlwaysOpaqueTy); + NamedStructTypesUniqueID = 0; } namespace { @@ -58,9 +55,7 @@ LLVMContextImpl::~LLVMContextImpl() { // will try to remove itself from OwnedModules set. This would cause // iterator invalidation if we iterated on the set directly. std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end()); - for (std::vector<Module*>::iterator I = Modules.begin(), E = Modules.end(); - I != E; ++I) - delete *I; + DeleteContainerPointers(Modules); std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(), DropReferences()); @@ -78,38 +73,22 @@ LLVMContextImpl::~LLVMContextImpl() { NullPtrConstants.freeConstants(); UndefValueConstants.freeConstants(); InlineAsms.freeConstants(); - for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); - I != E; ++I) { - delete I->second; - } - for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); - I != E; ++I) { - delete I->second; - } - AlwaysOpaqueTy->dropRef(); - for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end(); - I != E; ++I) { - (*I)->AbstractTypeUsers.clear(); - delete *I; - } + DeleteContainerSeconds(IntConstants); + DeleteContainerSeconds(FPConstants); + // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. SmallVector<MDNode*, 8> MDNodes; MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size()); for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end(); - I != E; ++I) { + I != E; ++I) MDNodes.push_back(&*I); - } MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end()); for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(), - E = MDNodes.end(); I != E; ++I) { + E = MDNodes.end(); I != E; ++I) (*I)->destroy(); - } assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() && "Destroying all MDNodes didn't empty the Context's sets."); // Destroy MDStrings. - for (StringMap<MDString*>::iterator I = MDStringCache.begin(), - E = MDStringCache.end(); I != E; ++I) { - delete I->second; - } + DeleteContainerSeconds(MDStringCache); } diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 6ea4b48e79b7..06a6f2a25a38 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -15,17 +15,16 @@ #ifndef LLVM_LLVMCONTEXT_IMPL_H #define LLVM_LLVMCONTEXT_IMPL_H +#include "llvm/LLVMContext.h" #include "ConstantsContext.h" #include "LeaksContext.h" -#include "TypesContext.h" -#include "llvm/LLVMContext.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Metadata.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Support/ValueHandle.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -139,27 +138,30 @@ public: // on Context destruction. SmallPtrSet<MDNode*, 1> NonUniquedMDNodes; - ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants; + ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants; - typedef ConstantUniqueMap<std::vector<Constant*>, ArrayType, - ConstantArray, true /*largekey*/> ArrayConstantsTy; + typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>, + ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy; ArrayConstantsTy ArrayConstants; - typedef ConstantUniqueMap<std::vector<Constant*>, StructType, - ConstantStruct, true /*largekey*/> StructConstantsTy; + typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>, + StructType, ConstantStruct, true /*largekey*/> StructConstantsTy; StructConstantsTy StructConstants; - typedef ConstantUniqueMap<std::vector<Constant*>, VectorType, - ConstantVector> VectorConstantsTy; + typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>, + VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; - ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants; - ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants; + ConstantUniqueMap<char, char, PointerType, ConstantPointerNull> + NullPtrConstants; + ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants; DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses; - ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants; + ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr> + ExprConstants; - ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms; + ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType, + InlineAsm> InlineAsms; ConstantInt *TheTrueVal; ConstantInt *TheFalseVal; @@ -167,41 +169,27 @@ public: LeakDetectorImpl<Value> LLVMObjects; // Basic type instances. - const Type VoidTy; - const Type LabelTy; - const Type FloatTy; - const Type DoubleTy; - const Type MetadataTy; - const Type X86_FP80Ty; - const Type FP128Ty; - const Type PPC_FP128Ty; - const Type X86_MMXTy; - const IntegerType Int1Ty; - const IntegerType Int8Ty; - const IntegerType Int16Ty; - const IntegerType Int32Ty; - const IntegerType Int64Ty; - - // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions - // for types as they are needed. Because resolution of types must invalidate - // all of the abstract type descriptions, we keep them in a separate map to - // make this easy. - TypePrinting ConcreteTypeDescriptions; - TypePrinting AbstractTypeDescriptions; - - TypeMap<ArrayValType, ArrayType> ArrayTypes; - TypeMap<VectorValType, VectorType> VectorTypes; - TypeMap<PointerValType, PointerType> PointerTypes; - TypeMap<FunctionValType, FunctionType> FunctionTypes; - TypeMap<StructValType, StructType> StructTypes; - TypeMap<IntegerValType, IntegerType> IntegerTypes; - - // Opaque types are not structurally uniqued, so don't use TypeMap. - typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy; - OpaqueTypesTy OpaqueTypes; - - /// Used as an abstract type that will never be resolved. - OpaqueType *const AlwaysOpaqueTy; + Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy; + Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy; + IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty; + + + /// TypeAllocator - All dynamically allocated types are allocated from this. + /// They live forever until the context is torn down. + BumpPtrAllocator TypeAllocator; + + DenseMap<unsigned, IntegerType*> IntegerTypes; + + // TODO: Optimize FunctionTypes/AnonStructTypes! + std::map<std::vector<Type*>, FunctionType*> FunctionTypes; + std::map<std::vector<Type*>, StructType*> AnonStructTypes; + StringMap<StructType*> NamedStructTypes; + unsigned NamedStructTypesUniqueID; + + DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes; + DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes; + DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0 + DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes; /// ValueHandles - This map keeps track of all of the value handles that are diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index eb719e54b289..ace4dc2de271 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ValueHandle.h" diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 341e527acb5b..be2fcb8ac6c0 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -17,12 +17,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/GVMaterializer.h" #include "llvm/LLVMContext.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/LeakDetector.h" #include "SymbolTableListTraitsImpl.h" -#include "llvm/TypeSymbolTable.h" #include <algorithm> #include <cstdarg> #include <cstdlib> @@ -60,7 +60,6 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>; Module::Module(StringRef MID, LLVMContext& C) : Context(C), Materializer(NULL), ModuleID(MID) { ValSymTab = new ValueSymbolTable(); - TypeSymTab = new TypeSymbolTable(); NamedMDSymTab = new StringMap<NamedMDNode *>(); Context.addModule(this); } @@ -74,11 +73,10 @@ Module::~Module() { LibraryList.clear(); NamedMDList.clear(); delete ValSymTab; - delete TypeSymTab; delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab); } -/// Target endian information... +/// Target endian information. Module::Endianness Module::getEndianness() const { StringRef temp = DataLayout; Module::Endianness ret = AnyEndianness; @@ -218,8 +216,8 @@ Constant *Module::getOrInsertFunction(StringRef Name, va_start(Args, RetTy); // Build the list of argument types... - std::vector<const Type*> ArgTys; - while (const Type *ArgTy = va_arg(Args, const Type*)) + std::vector<Type*> ArgTys; + while (Type *ArgTy = va_arg(Args, Type*)) ArgTys.push_back(ArgTy); va_end(Args); @@ -236,8 +234,8 @@ Constant *Module::getOrInsertFunction(StringRef Name, va_start(Args, RetTy); // Build the list of argument types... - std::vector<const Type*> ArgTys; - while (const Type *ArgTy = va_arg(Args, const Type*)) + std::vector<Type*> ArgTys; + while (Type *ArgTy = va_arg(Args, Type*)) ArgTys.push_back(ArgTy); va_end(Args); @@ -340,51 +338,6 @@ void Module::eraseNamedMetadata(NamedMDNode *NMD) { NamedMDList.erase(NMD); } -//===----------------------------------------------------------------------===// -// Methods for easy access to the types in the module. -// - - -// addTypeName - Insert an entry in the symbol table mapping Str to Type. If -// there is already an entry for this name, true is returned and the symbol -// table is not modified. -// -bool Module::addTypeName(StringRef Name, const Type *Ty) { - TypeSymbolTable &ST = getTypeSymbolTable(); - - if (ST.lookup(Name)) return true; // Already in symtab... - - // Not in symbol table? Set the name with the Symtab as an argument so the - // type knows what to update... - ST.insert(Name, Ty); - - return false; -} - -/// getTypeByName - Return the type with the specified name in this module, or -/// null if there is none by that name. -const Type *Module::getTypeByName(StringRef Name) const { - const TypeSymbolTable &ST = getTypeSymbolTable(); - return cast_or_null<Type>(ST.lookup(Name)); -} - -// getTypeName - If there is at least one entry in the symbol table for the -// specified type, return it. -// -std::string Module::getTypeName(const Type *Ty) const { - const TypeSymbolTable &ST = getTypeSymbolTable(); - - TypeSymbolTable::const_iterator TI = ST.begin(); - TypeSymbolTable::const_iterator TE = ST.end(); - if ( TI == TE ) return ""; // No names for types - - while (TI != TE && TI->second != Ty) - ++TI; - - if (TI != TE) // Must have found an entry! - return TI->first; - return ""; // Must not have found anything... -} //===----------------------------------------------------------------------===// // Methods to control the materialization of GlobalValues in the Module. @@ -471,3 +424,130 @@ void Module::removeLibrary(StringRef Lib) { return; } } + +//===----------------------------------------------------------------------===// +// Type finding functionality. +//===----------------------------------------------------------------------===// + +namespace { + /// TypeFinder - Walk over a module, identifying all of the types that are + /// used by the module. + class TypeFinder { + // To avoid walking constant expressions multiple times and other IR + // objects, we keep several helper maps. + DenseSet<const Value*> VisitedConstants; + DenseSet<const Type*> VisitedTypes; + + std::vector<StructType*> &StructTypes; + public: + TypeFinder(std::vector<StructType*> &structTypes) + : StructTypes(structTypes) {} + + void run(const Module &M) { + // Get types from global variables. + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + incorporateType(I->getType()); + if (I->hasInitializer()) + incorporateValue(I->getInitializer()); + } + + // Get types from aliases. + for (Module::const_alias_iterator I = M.alias_begin(), + E = M.alias_end(); I != E; ++I) { + incorporateType(I->getType()); + if (const Value *Aliasee = I->getAliasee()) + incorporateValue(Aliasee); + } + + SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst; + + // Get types from functions. + for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { + incorporateType(FI->getType()); + + for (Function::const_iterator BB = FI->begin(), E = FI->end(); + BB != E;++BB) + for (BasicBlock::const_iterator II = BB->begin(), + E = BB->end(); II != E; ++II) { + const Instruction &I = *II; + // Incorporate the type of the instruction and all its operands. + incorporateType(I.getType()); + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + incorporateValue(*OI); + + // Incorporate types hiding in metadata. + I.getAllMetadataOtherThanDebugLoc(MDForInst); + for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) + incorporateMDNode(MDForInst[i].second); + MDForInst.clear(); + } + } + + for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + incorporateMDNode(NMD->getOperand(i)); + } + } + + private: + void incorporateType(Type *Ty) { + // Check to see if we're already visited this type. + if (!VisitedTypes.insert(Ty).second) + return; + + // If this is a structure or opaque type, add a name for the type. + if (StructType *STy = dyn_cast<StructType>(Ty)) + StructTypes.push_back(STy); + + // Recursively walk all contained types. + for (Type::subtype_iterator I = Ty->subtype_begin(), + E = Ty->subtype_end(); I != E; ++I) + incorporateType(*I); + } + + /// incorporateValue - This method is used to walk operand lists finding + /// types hiding in constant expressions and other operands that won't be + /// walked in other ways. GlobalValues, basic blocks, instructions, and + /// inst operands are all explicitly enumerated. + void incorporateValue(const Value *V) { + if (const MDNode *M = dyn_cast<MDNode>(V)) + return incorporateMDNode(M); + if (!isa<Constant>(V) || isa<GlobalValue>(V)) return; + + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Check this type. + incorporateType(V->getType()); + + // Look in operands for types. + const User *U = cast<User>(V); + for (Constant::const_op_iterator I = U->op_begin(), + E = U->op_end(); I != E;++I) + incorporateValue(*I); + } + + void incorporateMDNode(const MDNode *V) { + + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Look in operands for types. + for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i) + if (Value *Op = V->getOperand(i)) + incorporateValue(Op); + } + }; +} // end anonymous namespace + +void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const { + TypeFinder(StructTypes).run(*this); +} + + diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index e4496db4317f..f874d1b28302 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -12,95 +12,17 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Constants.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Threading.h" +#include "llvm/Module.h" #include <algorithm> #include <cstdarg> +#include "llvm/ADT/SmallString.h" using namespace llvm; -// DEBUG_MERGE_TYPES - Enable this #define to see how and when derived types are -// created and later destroyed, all in an effort to make sure that there is only -// a single canonical version of a type. -// -// #define DEBUG_MERGE_TYPES 1 - -AbstractTypeUser::~AbstractTypeUser() {} - -void AbstractTypeUser::setType(Value *V, const Type *NewTy) { - V->VTy = NewTy; -} - //===----------------------------------------------------------------------===// // Type Class Implementation //===----------------------------------------------------------------------===// -/// Because of the way Type subclasses are allocated, this function is necessary -/// to use the correct kind of "delete" operator to deallocate the Type object. -/// Some type objects (FunctionTy, StructTy) allocate additional space -/// after the space for their derived type to hold the contained types array of -/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are -/// allocated with the type object, decreasing allocations and eliminating the -/// need for a std::vector to be used in the Type class itself. -/// @brief Type destruction function -void Type::destroy() const { - // Nothing calls getForwardedType from here on. - if (ForwardType && ForwardType->isAbstract()) { - ForwardType->dropRef(); - ForwardType = NULL; - } - - // Structures and Functions allocate their contained types past the end of - // the type object itself. These need to be destroyed differently than the - // other types. - if (this->isFunctionTy() || this->isStructTy()) { - // First, make sure we destruct any PATypeHandles allocated by these - // subclasses. They must be manually destructed. - for (unsigned i = 0; i < NumContainedTys; ++i) - ContainedTys[i].PATypeHandle::~PATypeHandle(); - - // Now call the destructor for the subclass directly because we're going - // to delete this as an array of char. - if (this->isFunctionTy()) - static_cast<const FunctionType*>(this)->FunctionType::~FunctionType(); - else { - assert(isStructTy()); - static_cast<const StructType*>(this)->StructType::~StructType(); - } - - // Finally, remove the memory as an array deallocation of the chars it was - // constructed from. - operator delete(const_cast<Type *>(this)); - - return; - } else if (const OpaqueType *opaque_this = dyn_cast<OpaqueType>(this)) { - LLVMContextImpl *pImpl = this->getContext().pImpl; - pImpl->OpaqueTypes.erase(opaque_this); - } - - // For all the other type subclasses, there is either no contained types or - // just one (all Sequentials). For Sequentials, the PATypeHandle is not - // allocated past the type object, its included directly in the SequentialType - // class. This means we can safely just do "normal" delete of this object and - // all the destructors that need to run will be run. - delete this; -} - -const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { +Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { switch (IDNumber) { case VoidTyID : return getVoidTy(C); case FloatTyID : return getFloatTy(C); @@ -116,15 +38,6 @@ const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { } } -const Type *Type::getVAArgsPromotedType(LLVMContext &C) const { - if (ID == IntegerTyID && getSubclassData() < 32) - return Type::getInt32Ty(C); - else if (ID == FloatTyID) - return Type::getDoubleTy(C); - else - return this; -} - /// getScalarType - If this is a vector type, return the element type, /// otherwise return this. const Type *Type::getScalarType() const { @@ -262,13 +175,17 @@ bool Type::isSizedDerivedType() const { if (const ArrayType *ATy = dyn_cast<ArrayType>(this)) return ATy->getElementType()->isSized(); - if (const VectorType *PTy = dyn_cast<VectorType>(this)) - return PTy->getElementType()->isSized(); + if (const VectorType *VTy = dyn_cast<VectorType>(this)) + return VTy->getElementType()->isSized(); if (!this->isStructTy()) return false; - // Okay, our struct is sized if all of the elements are... + // Opaque structs have no size. + if (cast<StructType>(this)->isOpaque()) + return false; + + // Okay, our struct is sized if all of the elements are. for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I) if (!(*I)->isSized()) return false; @@ -276,696 +193,335 @@ bool Type::isSizedDerivedType() const { return true; } -/// getForwardedTypeInternal - This method is used to implement the union-find -/// algorithm for when a type is being forwarded to another type. -const Type *Type::getForwardedTypeInternal() const { - assert(ForwardType && "This type is not being forwarded to another type!"); - - // Check to see if the forwarded type has been forwarded on. If so, collapse - // the forwarding links. - const Type *RealForwardedType = ForwardType->getForwardedType(); - if (!RealForwardedType) - return ForwardType; // No it's not forwarded again - - // Yes, it is forwarded again. First thing, add the reference to the new - // forward type. - if (RealForwardedType->isAbstract()) - RealForwardedType->addRef(); - - // Now drop the old reference. This could cause ForwardType to get deleted. - // ForwardType must be abstract because only abstract types can have their own - // ForwardTypes. - ForwardType->dropRef(); - - // Return the updated type. - ForwardType = RealForwardedType; - return ForwardType; -} - -void Type::refineAbstractType(const DerivedType *OldTy, const Type *NewTy) { - llvm_unreachable("Attempting to refine a derived type!"); -} -void Type::typeBecameConcrete(const DerivedType *AbsTy) { - llvm_unreachable("DerivedType is already a concrete type!"); -} - - -std::string Type::getDescription() const { - LLVMContextImpl *pImpl = getContext().pImpl; - TypePrinting &Map = - isAbstract() ? - pImpl->AbstractTypeDescriptions : - pImpl->ConcreteTypeDescriptions; - - std::string DescStr; - raw_string_ostream DescOS(DescStr); - Map.print(this, DescOS); - return DescOS.str(); -} - - -bool StructType::indexValid(const Value *V) const { - // Structure indexes require 32-bit integer constants. - if (V->getType()->isIntegerTy(32)) - if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) - return indexValid(CU->getZExtValue()); - return false; -} - -bool StructType::indexValid(unsigned V) const { - return V < NumContainedTys; -} - -// getTypeAtIndex - Given an index value into the type, return the type of the -// element. For a structure type, this must be a constant value... -// -const Type *StructType::getTypeAtIndex(const Value *V) const { - unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue(); - return getTypeAtIndex(Idx); -} - -const Type *StructType::getTypeAtIndex(unsigned Idx) const { - assert(indexValid(Idx) && "Invalid structure index!"); - return ContainedTys[Idx]; -} - - //===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// -const Type *Type::getVoidTy(LLVMContext &C) { - return &C.pImpl->VoidTy; -} - -const Type *Type::getLabelTy(LLVMContext &C) { - return &C.pImpl->LabelTy; -} - -const Type *Type::getFloatTy(LLVMContext &C) { - return &C.pImpl->FloatTy; -} - -const Type *Type::getDoubleTy(LLVMContext &C) { - return &C.pImpl->DoubleTy; -} - -const Type *Type::getMetadataTy(LLVMContext &C) { - return &C.pImpl->MetadataTy; -} - -const Type *Type::getX86_FP80Ty(LLVMContext &C) { - return &C.pImpl->X86_FP80Ty; -} - -const Type *Type::getFP128Ty(LLVMContext &C) { - return &C.pImpl->FP128Ty; -} - -const Type *Type::getPPC_FP128Ty(LLVMContext &C) { - return &C.pImpl->PPC_FP128Ty; -} - -const Type *Type::getX86_MMXTy(LLVMContext &C) { - return &C.pImpl->X86_MMXTy; -} - -const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) { +Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; } +Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; } +Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; } +Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; } +Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; } +Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; } +Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; } +Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; } +Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; } + +IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; } +IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; } +IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; } +IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; } +IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; } + +IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) { return IntegerType::get(C, N); } -const IntegerType *Type::getInt1Ty(LLVMContext &C) { - return &C.pImpl->Int1Ty; -} - -const IntegerType *Type::getInt8Ty(LLVMContext &C) { - return &C.pImpl->Int8Ty; -} - -const IntegerType *Type::getInt16Ty(LLVMContext &C) { - return &C.pImpl->Int16Ty; -} - -const IntegerType *Type::getInt32Ty(LLVMContext &C) { - return &C.pImpl->Int32Ty; -} - -const IntegerType *Type::getInt64Ty(LLVMContext &C) { - return &C.pImpl->Int64Ty; -} - -const PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) { return getFloatTy(C)->getPointerTo(AS); } -const PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) { return getDoubleTy(C)->getPointerTo(AS); } -const PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) { return getX86_FP80Ty(C)->getPointerTo(AS); } -const PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) { return getFP128Ty(C)->getPointerTo(AS); } -const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) { return getPPC_FP128Ty(C)->getPointerTo(AS); } -const PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) { return getX86_MMXTy(C)->getPointerTo(AS); } -const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) { +PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) { return getIntNTy(C, N)->getPointerTo(AS); } -const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) { return getInt1Ty(C)->getPointerTo(AS); } -const PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) { return getInt8Ty(C)->getPointerTo(AS); } -const PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) { return getInt16Ty(C)->getPointerTo(AS); } -const PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) { return getInt32Ty(C)->getPointerTo(AS); } -const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) { +PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) { return getInt64Ty(C)->getPointerTo(AS); } + //===----------------------------------------------------------------------===// -// Derived Type Constructors +// IntegerType Implementation //===----------------------------------------------------------------------===// -/// isValidReturnType - Return true if the specified type is valid as a return -/// type. -bool FunctionType::isValidReturnType(const Type *RetTy) { - return !RetTy->isFunctionTy() && !RetTy->isLabelTy() && - !RetTy->isMetadataTy(); +IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { + assert(NumBits >= MIN_INT_BITS && "bitwidth too small"); + assert(NumBits <= MAX_INT_BITS && "bitwidth too large"); + + // Check for the built-in integer types + switch (NumBits) { + case 1: return cast<IntegerType>(Type::getInt1Ty(C)); + case 8: return cast<IntegerType>(Type::getInt8Ty(C)); + case 16: return cast<IntegerType>(Type::getInt16Ty(C)); + case 32: return cast<IntegerType>(Type::getInt32Ty(C)); + case 64: return cast<IntegerType>(Type::getInt64Ty(C)); + default: + break; + } + + IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits]; + + if (Entry == 0) + Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits); + + return Entry; } -/// isValidArgumentType - Return true if the specified type is valid as an -/// argument type. -bool FunctionType::isValidArgumentType(const Type *ArgTy) { - return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy(); +bool IntegerType::isPowerOf2ByteWidth() const { + unsigned BitWidth = getBitWidth(); + return (BitWidth > 7) && isPowerOf2_32(BitWidth); } -FunctionType::FunctionType(const Type *Result, - ArrayRef<const Type*> Params, +APInt IntegerType::getMask() const { + return APInt::getAllOnesValue(getBitWidth()); +} + +//===----------------------------------------------------------------------===// +// FunctionType Implementation +//===----------------------------------------------------------------------===// + +FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params, bool IsVarArgs) - : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) { - ContainedTys = reinterpret_cast<PATypeHandle*>(this+1); - NumContainedTys = Params.size() + 1; // + 1 for result type + : Type(Result->getContext(), FunctionTyID) { + Type **SubTys = reinterpret_cast<Type**>(this+1); assert(isValidReturnType(Result) && "invalid return type for function"); + setSubclassData(IsVarArgs); + SubTys[0] = const_cast<Type*>(Result); - bool isAbstract = Result->isAbstract(); - new (&ContainedTys[0]) PATypeHandle(Result, this); - - for (unsigned i = 0; i != Params.size(); ++i) { + for (unsigned i = 0, e = Params.size(); i != e; ++i) { assert(isValidArgumentType(Params[i]) && "Not a valid type for function argument!"); - new (&ContainedTys[i+1]) PATypeHandle(Params[i], this); - isAbstract |= Params[i]->isAbstract(); + SubTys[i+1] = Params[i]; } - // Calculate whether or not this type is abstract - setAbstract(isAbstract); + ContainedTys = SubTys; + NumContainedTys = Params.size() + 1; // + 1 for result type } -StructType::StructType(LLVMContext &C, - ArrayRef<const Type*> Types, bool isPacked) - : CompositeType(C, StructTyID) { - ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1); - NumContainedTys = Types.size(); - setSubclassData(isPacked); - bool isAbstract = false; - for (unsigned i = 0; i < Types.size(); ++i) { - assert(Types[i] && "<null> type for structure field!"); - assert(isValidElementType(Types[i]) && - "Invalid type for structure element!"); - new (&ContainedTys[i]) PATypeHandle(Types[i], this); - isAbstract |= Types[i]->isAbstract(); +// FunctionType::get - The factory function for the FunctionType class. +FunctionType *FunctionType::get(const Type *ReturnType, + ArrayRef<Type*> Params, bool isVarArg) { + // TODO: This is brutally slow. + std::vector<Type*> Key; + Key.reserve(Params.size()+2); + Key.push_back(const_cast<Type*>(ReturnType)); + for (unsigned i = 0, e = Params.size(); i != e; ++i) + Key.push_back(const_cast<Type*>(Params[i])); + if (isVarArg) + Key.push_back(0); + + LLVMContextImpl *pImpl = ReturnType->getContext().pImpl; + FunctionType *&FT = pImpl->FunctionTypes[Key]; + + if (FT == 0) { + FT = (FunctionType*) pImpl->TypeAllocator. + Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1), + AlignOf<FunctionType>::Alignment); + new (FT) FunctionType(ReturnType, Params, isVarArg); } - // Calculate whether or not this type is abstract - setAbstract(isAbstract); -} - -ArrayType::ArrayType(const Type *ElType, uint64_t NumEl) - : SequentialType(ArrayTyID, ElType) { - NumElements = NumEl; - - // Calculate whether or not this type is abstract - setAbstract(ElType->isAbstract()); -} - -VectorType::VectorType(const Type *ElType, unsigned NumEl) - : SequentialType(VectorTyID, ElType) { - NumElements = NumEl; - setAbstract(ElType->isAbstract()); - assert(NumEl > 0 && "NumEl of a VectorType must be greater than 0"); - assert(isValidElementType(ElType) && - "Elements of a VectorType must be a primitive type"); - -} - - -PointerType::PointerType(const Type *E, unsigned AddrSpace) - : SequentialType(PointerTyID, E) { - AddressSpace = AddrSpace; - // Calculate whether or not this type is abstract - setAbstract(E->isAbstract()); -} - -OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) { - setAbstract(true); -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *this << "\n"); -#endif + return FT; } -void PATypeHolder::destroy() { - Ty = 0; -} -// dropAllTypeUses - When this (abstract) type is resolved to be equal to -// another (more concrete) type, we must eliminate all references to other -// types, to avoid some circular reference problems. -void DerivedType::dropAllTypeUses() { - if (NumContainedTys != 0) { - // The type must stay abstract. To do this, we insert a pointer to a type - // that will never get resolved, thus will always be abstract. - ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy; - - // Change the rest of the types to be Int32Ty's. It doesn't matter what we - // pick so long as it doesn't point back to this type. We choose something - // concrete to avoid overhead for adding to AbstractTypeUser lists and - // stuff. - const Type *ConcreteTy = Type::getInt32Ty(getContext()); - for (unsigned i = 1, e = NumContainedTys; i != e; ++i) - ContainedTys[i] = ConcreteTy; - } +FunctionType *FunctionType::get(const Type *Result, bool isVarArg) { + return get(Result, ArrayRef<Type *>(), isVarArg); } -namespace { - -/// TypePromotionGraph and graph traits - this is designed to allow us to do -/// efficient SCC processing of type graphs. This is the exact same as -/// GraphTraits<Type*>, except that we pretend that concrete types have no -/// children to avoid processing them. -struct TypePromotionGraph { - Type *Ty; - TypePromotionGraph(Type *T) : Ty(T) {} -}; - -} - -namespace llvm { - template <> struct GraphTraits<TypePromotionGraph> { - typedef Type NodeType; - typedef Type::subtype_iterator ChildIteratorType; - - static inline NodeType *getEntryNode(TypePromotionGraph G) { return G.Ty; } - static inline ChildIteratorType child_begin(NodeType *N) { - if (N->isAbstract()) - return N->subtype_begin(); - // No need to process children of concrete types. - return N->subtype_end(); - } - static inline ChildIteratorType child_end(NodeType *N) { - return N->subtype_end(); - } - }; +/// isValidReturnType - Return true if the specified type is valid as a return +/// type. +bool FunctionType::isValidReturnType(const Type *RetTy) { + return !RetTy->isFunctionTy() && !RetTy->isLabelTy() && + !RetTy->isMetadataTy(); } - -// PromoteAbstractToConcrete - This is a recursive function that walks a type -// graph calculating whether or not a type is abstract. -// -void Type::PromoteAbstractToConcrete() { - if (!isAbstract()) return; - - scc_iterator<TypePromotionGraph> SI = scc_begin(TypePromotionGraph(this)); - scc_iterator<TypePromotionGraph> SE = scc_end (TypePromotionGraph(this)); - - for (; SI != SE; ++SI) { - std::vector<Type*> &SCC = *SI; - - // Concrete types are leaves in the tree. Since an SCC will either be all - // abstract or all concrete, we only need to check one type. - if (!SCC[0]->isAbstract()) continue; - - if (SCC[0]->isOpaqueTy()) - return; // Not going to be concrete, sorry. - - // If all of the children of all of the types in this SCC are concrete, - // then this SCC is now concrete as well. If not, neither this SCC, nor - // any parent SCCs will be concrete, so we might as well just exit. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - for (Type::subtype_iterator CI = SCC[i]->subtype_begin(), - E = SCC[i]->subtype_end(); CI != E; ++CI) - if ((*CI)->isAbstract()) - // If the child type is in our SCC, it doesn't make the entire SCC - // abstract unless there is a non-SCC abstract type. - if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end()) - return; // Not going to be concrete, sorry. - - // Okay, we just discovered this whole SCC is now concrete, mark it as - // such! - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - assert(SCC[i]->isAbstract() && "Why are we processing concrete types?"); - - SCC[i]->setAbstract(false); - } - - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - assert(!SCC[i]->isAbstract() && "Concrete type became abstract?"); - // The type just became concrete, notify all users! - cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete(); - } - } +/// isValidArgumentType - Return true if the specified type is valid as an +/// argument type. +bool FunctionType::isValidArgumentType(const Type *ArgTy) { + return ArgTy->isFirstClassType(); } - //===----------------------------------------------------------------------===// -// Type Structural Equality Testing +// StructType Implementation //===----------------------------------------------------------------------===// -// TypesEqual - Two types are considered structurally equal if they have the -// same "shape": Every level and element of the types have identical primitive -// ID's, and the graphs have the same edges/nodes in them. Nodes do not have to -// be pointer equals to be equivalent though. This uses an optimistic algorithm -// that assumes that two graphs are the same until proven otherwise. -// -static bool TypesEqual(const Type *Ty, const Type *Ty2, - std::map<const Type *, const Type *> &EqTypes) { - if (Ty == Ty2) return true; - if (Ty->getTypeID() != Ty2->getTypeID()) return false; - if (Ty->isOpaqueTy()) - return false; // Two unequal opaque types are never equal - - std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty); - if (It != EqTypes.end()) - return It->second == Ty2; // Looping back on a type, check for equality - - // Otherwise, add the mapping to the table to make sure we don't get - // recursion on the types... - EqTypes.insert(It, std::make_pair(Ty, Ty2)); - - // Two really annoying special cases that breaks an otherwise nice simple - // algorithm is the fact that arraytypes have sizes that differentiates types, - // and that function types can be varargs or not. Consider this now. - // - if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { - const IntegerType *ITy2 = cast<IntegerType>(Ty2); - return ITy->getBitWidth() == ITy2->getBitWidth(); - } - - if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { - const PointerType *PTy2 = cast<PointerType>(Ty2); - return PTy->getAddressSpace() == PTy2->getAddressSpace() && - TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes); +// Primitive Constructors. + +StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, + bool isPacked) { + // FIXME: std::vector is horribly inefficient for this probe. + std::vector<Type*> Key; + for (unsigned i = 0, e = ETypes.size(); i != e; ++i) { + assert(isValidElementType(ETypes[i]) && + "Invalid type for structure element!"); + Key.push_back(ETypes[i]); } + if (isPacked) + Key.push_back(0); - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - const StructType *STy2 = cast<StructType>(Ty2); - if (STy->getNumElements() != STy2->getNumElements()) return false; - if (STy->isPacked() != STy2->isPacked()) return false; - for (unsigned i = 0, e = STy2->getNumElements(); i != e; ++i) - if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes)) - return false; - return true; - } + StructType *&ST = Context.pImpl->AnonStructTypes[Key]; + if (ST) return ST; - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const ArrayType *ATy2 = cast<ArrayType>(Ty2); - return ATy->getNumElements() == ATy2->getNumElements() && - TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes); - } + // Value not found. Create a new type! + ST = new (Context.pImpl->TypeAllocator) StructType(Context); + ST->setSubclassData(SCDB_IsAnonymous); // Anonymous struct. + ST->setBody(ETypes, isPacked); + return ST; +} + +void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) { + assert(isOpaque() && "Struct body already set!"); - if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) { - const VectorType *PTy2 = cast<VectorType>(Ty2); - return PTy->getNumElements() == PTy2->getNumElements() && - TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes); - } + setSubclassData(getSubclassData() | SCDB_HasBody); + if (isPacked) + setSubclassData(getSubclassData() | SCDB_Packed); - if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) { - const FunctionType *FTy2 = cast<FunctionType>(Ty2); - if (FTy->isVarArg() != FTy2->isVarArg() || - FTy->getNumParams() != FTy2->getNumParams() || - !TypesEqual(FTy->getReturnType(), FTy2->getReturnType(), EqTypes)) - return false; - for (unsigned i = 0, e = FTy2->getNumParams(); i != e; ++i) { - if (!TypesEqual(FTy->getParamType(i), FTy2->getParamType(i), EqTypes)) - return false; - } - return true; - } + Type **Elts = getContext().pImpl-> + TypeAllocator.Allocate<Type*>(Elements.size()); + memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size()); - llvm_unreachable("Unknown derived type!"); - return false; -} - -namespace llvm { // in namespace llvm so findable by ADL -static bool TypesEqual(const Type *Ty, const Type *Ty2) { - std::map<const Type *, const Type *> EqTypes; - return ::TypesEqual(Ty, Ty2, EqTypes); -} -} - -// AbstractTypeHasCycleThrough - Return true there is a path from CurTy to -// TargetTy in the type graph. We know that Ty is an abstract type, so if we -// ever reach a non-abstract type, we know that we don't need to search the -// subgraph. -static bool AbstractTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy, - SmallPtrSet<const Type*, 128> &VisitedTypes) { - if (TargetTy == CurTy) return true; - if (!CurTy->isAbstract()) return false; - - if (!VisitedTypes.insert(CurTy)) - return false; // Already been here. - - for (Type::subtype_iterator I = CurTy->subtype_begin(), - E = CurTy->subtype_end(); I != E; ++I) - if (AbstractTypeHasCycleThrough(TargetTy, *I, VisitedTypes)) - return true; - return false; + ContainedTys = Elts; + NumContainedTys = Elements.size(); } -static bool ConcreteTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy, - SmallPtrSet<const Type*, 128> &VisitedTypes) { - if (TargetTy == CurTy) return true; - - if (!VisitedTypes.insert(CurTy)) - return false; // Already been here. - - for (Type::subtype_iterator I = CurTy->subtype_begin(), - E = CurTy->subtype_end(); I != E; ++I) - if (ConcreteTypeHasCycleThrough(TargetTy, *I, VisitedTypes)) - return true; - return false; +StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) { + StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context); + if (!Name.empty()) + ST->setName(Name); + return ST; } -/// TypeHasCycleThroughItself - Return true if the specified type has -/// a cycle back to itself. +void StructType::setName(StringRef Name) { + if (Name == getName()) return; -namespace llvm { // in namespace llvm so it's findable by ADL -static bool TypeHasCycleThroughItself(const Type *Ty) { - SmallPtrSet<const Type*, 128> VisitedTypes; - - if (Ty->isAbstract()) { // Optimized case for abstract types. - for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); - I != E; ++I) - if (AbstractTypeHasCycleThrough(Ty, *I, VisitedTypes)) - return true; - } else { - for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); - I != E; ++I) - if (ConcreteTypeHasCycleThrough(Ty, *I, VisitedTypes)) - return true; + // If this struct already had a name, remove its symbol table entry. + if (SymbolTableEntry) { + getContext().pImpl->NamedStructTypes.erase(getName()); + SymbolTableEntry = 0; } - return false; -} -} - -//===----------------------------------------------------------------------===// -// Function Type Factory and Value Class... -// -const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { - assert(NumBits >= MIN_INT_BITS && "bitwidth too small"); - assert(NumBits <= MAX_INT_BITS && "bitwidth too large"); - - // Check for the built-in integer types - switch (NumBits) { - case 1: return cast<IntegerType>(Type::getInt1Ty(C)); - case 8: return cast<IntegerType>(Type::getInt8Ty(C)); - case 16: return cast<IntegerType>(Type::getInt16Ty(C)); - case 32: return cast<IntegerType>(Type::getInt32Ty(C)); - case 64: return cast<IntegerType>(Type::getInt64Ty(C)); - default: - break; - } - - LLVMContextImpl *pImpl = C.pImpl; - IntegerValType IVT(NumBits); - IntegerType *ITy = 0; + // If this is just removing the name, we're done. + if (Name.empty()) + return; - // First, see if the type is already in the table, for which - // a reader lock suffices. - ITy = pImpl->IntegerTypes.get(IVT); - - if (!ITy) { - // Value not found. Derive a new type! - ITy = new IntegerType(C, NumBits); - pImpl->IntegerTypes.add(IVT, ITy); + // Look up the entry for the name. + StringMapEntry<StructType*> *Entry = + &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name); + + // While we have a name collision, try a random rename. + if (Entry->getValue()) { + SmallString<64> TempStr(Name); + TempStr.push_back('.'); + raw_svector_ostream TmpStream(TempStr); + + do { + TempStr.resize(Name.size()+1); + TmpStream.resync(); + TmpStream << getContext().pImpl->NamedStructTypesUniqueID++; + + Entry = &getContext().pImpl-> + NamedStructTypes.GetOrCreateValue(TmpStream.str()); + } while (Entry->getValue()); } -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *ITy << "\n"); -#endif - return ITy; -} -bool IntegerType::isPowerOf2ByteWidth() const { - unsigned BitWidth = getBitWidth(); - return (BitWidth > 7) && isPowerOf2_32(BitWidth); + // Okay, we found an entry that isn't used. It's us! + Entry->setValue(this); + + SymbolTableEntry = Entry; } -APInt IntegerType::getMask() const { - return APInt::getAllOnesValue(getBitWidth()); -} +//===----------------------------------------------------------------------===// +// StructType Helper functions. -FunctionValType FunctionValType::get(const FunctionType *FT) { - // Build up a FunctionValType - std::vector<const Type *> ParamTypes; - ParamTypes.reserve(FT->getNumParams()); - for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) - ParamTypes.push_back(FT->getParamType(i)); - return FunctionValType(FT->getReturnType(), ParamTypes, FT->isVarArg()); +StructType *StructType::get(LLVMContext &Context, bool isPacked) { + return get(Context, llvm::ArrayRef<Type*>(), isPacked); } - -// FunctionType::get - The factory function for the FunctionType class... -FunctionType *FunctionType::get(const Type *ReturnType, - ArrayRef<const Type*> Params, - bool isVarArg) { - FunctionValType VT(ReturnType, Params, isVarArg); - FunctionType *FT = 0; - - LLVMContextImpl *pImpl = ReturnType->getContext().pImpl; - - FT = pImpl->FunctionTypes.get(VT); - - if (!FT) { - FT = (FunctionType*) operator new(sizeof(FunctionType) + - sizeof(PATypeHandle)*(Params.size()+1)); - new (FT) FunctionType(ReturnType, Params, isVarArg); - pImpl->FunctionTypes.add(VT, FT); +StructType *StructType::get(Type *type, ...) { + assert(type != 0 && "Cannot create a struct type with no elements with this"); + LLVMContext &Ctx = type->getContext(); + va_list ap; + SmallVector<llvm::Type*, 8> StructFields; + va_start(ap, type); + while (type) { + StructFields.push_back(type); + type = va_arg(ap, llvm::Type*); } - -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << FT << "\n"); -#endif - return FT; + return llvm::StructType::get(Ctx, StructFields); } -ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) { - assert(ElementType && "Can't get array of <null> types!"); - assert(isValidElementType(ElementType) && "Invalid type for array element!"); - - ArrayValType AVT(ElementType, NumElements); - ArrayType *AT = 0; - - LLVMContextImpl *pImpl = ElementType->getContext().pImpl; - - AT = pImpl->ArrayTypes.get(AVT); - - if (!AT) { - // Value not found. Derive a new type! - pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements)); - } -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *AT << "\n"); -#endif - return AT; +StructType *StructType::createNamed(LLVMContext &Context, StringRef Name, + ArrayRef<Type*> Elements, bool isPacked) { + StructType *ST = createNamed(Context, Name); + ST->setBody(Elements, isPacked); + return ST; } -bool ArrayType::isValidElementType(const Type *ElemTy) { - return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && - !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); +StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements, + bool isPacked) { + assert(!Elements.empty() && + "This method may not be invoked with an empty list"); + return createNamed(Elements[0]->getContext(), Name, Elements, isPacked); } -VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { - assert(ElementType && "Can't get vector of <null> types!"); - - VectorValType PVT(ElementType, NumElements); - VectorType *PT = 0; - - LLVMContextImpl *pImpl = ElementType->getContext().pImpl; - - PT = pImpl->VectorTypes.get(PVT); - - if (!PT) { - pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements)); +StructType *StructType::createNamed(StringRef Name, Type *type, ...) { + assert(type != 0 && "Cannot create a struct type with no elements with this"); + LLVMContext &Ctx = type->getContext(); + va_list ap; + SmallVector<llvm::Type*, 8> StructFields; + va_start(ap, type); + while (type) { + StructFields.push_back(type); + type = va_arg(ap, llvm::Type*); } -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *PT << "\n"); -#endif - return PT; -} - -bool VectorType::isValidElementType(const Type *ElemTy) { - return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() || - ElemTy->isOpaqueTy(); + return llvm::StructType::createNamed(Ctx, Name, StructFields); } -//===----------------------------------------------------------------------===// -// Struct Type Factory... -// - -StructType *StructType::get(LLVMContext &Context, - ArrayRef<const Type*> ETypes, - bool isPacked) { - StructValType STV(ETypes, isPacked); - StructType *ST = 0; - - LLVMContextImpl *pImpl = Context.pImpl; +StringRef StructType::getName() const { + assert(!isAnonymous() && "Anonymous structs never have names"); + if (SymbolTableEntry == 0) return StringRef(); - ST = pImpl->StructTypes.get(STV); - - if (!ST) { - // Value not found. Derive a new type! - ST = (StructType*) operator new(sizeof(StructType) + - sizeof(PATypeHandle) * ETypes.size()); - new (ST) StructType(Context, ETypes, isPacked); - pImpl->StructTypes.add(STV, ST); - } -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *ST << "\n"); -#endif - return ST; + return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey(); } -StructType *StructType::get(LLVMContext &Context, const Type *type, ...) { +void StructType::setBody(Type *type, ...) { + assert(type != 0 && "Cannot create a struct type with no elements with this"); va_list ap; - std::vector<const llvm::Type*> StructFields; + SmallVector<llvm::Type*, 8> StructFields; va_start(ap, type); while (type) { StructFields.push_back(type); type = va_arg(ap, llvm::Type*); } - return llvm::StructType::get(Context, StructFields); + setBody(StructFields); } bool StructType::isValidElementType(const Type *ElemTy) { @@ -973,278 +529,159 @@ bool StructType::isValidElementType(const Type *ElemTy) { !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); } - -//===----------------------------------------------------------------------===// -// Pointer Type Factory... -// - -PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) { - assert(ValueType && "Can't get a pointer to <null> type!"); - assert(ValueType->getTypeID() != VoidTyID && - "Pointer to void is not valid, use i8* instead!"); - assert(isValidElementType(ValueType) && "Invalid type for pointer element!"); - PointerValType PVT(ValueType, AddressSpace); - - PointerType *PT = 0; - - LLVMContextImpl *pImpl = ValueType->getContext().pImpl; +/// isLayoutIdentical - Return true if this is layout identical to the +/// specified struct. +bool StructType::isLayoutIdentical(const StructType *Other) const { + if (this == Other) return true; - PT = pImpl->PointerTypes.get(PVT); + if (isPacked() != Other->isPacked() || + getNumElements() != Other->getNumElements()) + return false; - if (!PT) { - // Value not found. Derive a new type! - pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace)); - } -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *PT << "\n"); -#endif - return PT; + return std::equal(element_begin(), element_end(), Other->element_begin()); } -const PointerType *Type::getPointerTo(unsigned addrs) const { - return PointerType::get(this, addrs); -} -bool PointerType::isValidElementType(const Type *ElemTy) { - return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && - !ElemTy->isMetadataTy(); +/// getTypeByName - Return the type with the specified name, or null if there +/// is none by that name. +StructType *Module::getTypeByName(StringRef Name) const { + StringMap<StructType*>::iterator I = + getContext().pImpl->NamedStructTypes.find(Name); + if (I != getContext().pImpl->NamedStructTypes.end()) + return I->second; + return 0; } //===----------------------------------------------------------------------===// -// Opaque Type Factory... -// +// CompositeType Implementation +//===----------------------------------------------------------------------===// -OpaqueType *OpaqueType::get(LLVMContext &C) { - OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct. - LLVMContextImpl *pImpl = C.pImpl; - pImpl->OpaqueTypes.insert(OT); - return OT; +Type *CompositeType::getTypeAtIndex(const Value *V) const { + if (const StructType *STy = dyn_cast<StructType>(this)) { + unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue(); + assert(indexValid(Idx) && "Invalid structure index!"); + return STy->getElementType(Idx); + } + + return cast<SequentialType>(this)->getElementType(); +} +Type *CompositeType::getTypeAtIndex(unsigned Idx) const { + if (const StructType *STy = dyn_cast<StructType>(this)) { + assert(indexValid(Idx) && "Invalid structure index!"); + return STy->getElementType(Idx); + } + + return cast<SequentialType>(this)->getElementType(); +} +bool CompositeType::indexValid(const Value *V) const { + if (const StructType *STy = dyn_cast<StructType>(this)) { + // Structure indexes require 32-bit integer constants. + if (V->getType()->isIntegerTy(32)) + if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) + return CU->getZExtValue() < STy->getNumElements(); + return false; + } + + // Sequential types can be indexed by any integer. + return V->getType()->isIntegerTy(); } +bool CompositeType::indexValid(unsigned Idx) const { + if (const StructType *STy = dyn_cast<StructType>(this)) + return Idx < STy->getNumElements(); + // Sequential types can be indexed by any integer. + return true; +} //===----------------------------------------------------------------------===// -// Derived Type Refinement Functions +// ArrayType Implementation //===----------------------------------------------------------------------===// -// addAbstractTypeUser - Notify an abstract type that there is a new user of -// it. This function is called primarily by the PATypeHandle class. -void Type::addAbstractTypeUser(AbstractTypeUser *U) const { - assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!"); - AbstractTypeUsers.push_back(U); +ArrayType::ArrayType(Type *ElType, uint64_t NumEl) + : SequentialType(ArrayTyID, ElType) { + NumElements = NumEl; } -// removeAbstractTypeUser - Notify an abstract type that a user of the class -// no longer has a handle to the type. This function is called primarily by -// the PATypeHandle class. When there are no users of the abstract type, it -// is annihilated, because there is no way to get a reference to it ever again. -// -void Type::removeAbstractTypeUser(AbstractTypeUser *U) const { - - // Search from back to front because we will notify users from back to - // front. Also, it is likely that there will be a stack like behavior to - // users that register and unregister users. - // - unsigned i; - for (i = AbstractTypeUsers.size(); AbstractTypeUsers[i-1] != U; --i) - assert(i != 0 && "AbstractTypeUser not in user list!"); - - --i; // Convert to be in range 0 <= i < size() - assert(i < AbstractTypeUsers.size() && "Index out of range!"); // Wraparound? - - AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i); - -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << " remAbstractTypeUser[" << (void*)this << ", " - << *this << "][" << i << "] User = " << U << "\n"); -#endif - - if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) { -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this - << ">[" << (void*)this << "]" << "\n"); -#endif +ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) { + Type *ElementType = const_cast<Type*>(elementType); + assert(isValidElementType(ElementType) && "Invalid type for array element!"); + + LLVMContextImpl *pImpl = ElementType->getContext().pImpl; + ArrayType *&Entry = + pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)]; - this->destroy(); - } -} - -// refineAbstractTypeTo - This function is used when it is discovered -// that the 'this' abstract type is actually equivalent to the NewType -// specified. This causes all users of 'this' to switch to reference the more -// concrete type NewType and for 'this' to be deleted. Only used for internal -// callers. -// -void DerivedType::refineAbstractTypeTo(const Type *NewType) { - assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!"); - assert(this != NewType && "Can't refine to myself!"); - assert(ForwardType == 0 && "This type has already been refined!"); - - LLVMContextImpl *pImpl = getContext().pImpl; - - // The descriptions may be out of date. Conservatively clear them all! - pImpl->AbstractTypeDescriptions.clear(); - -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " " - << *this << "] to [" << (void*)NewType << " " - << *NewType << "]!\n"); -#endif - - // Make sure to put the type to be refined to into a holder so that if IT gets - // refined, that we will not continue using a dead reference... - // - PATypeHolder NewTy(NewType); - // Any PATypeHolders referring to this type will now automatically forward to - // the type we are resolved to. - ForwardType = NewType; - if (ForwardType->isAbstract()) - ForwardType->addRef(); - - // Add a self use of the current type so that we don't delete ourself until - // after the function exits. - // - PATypeHolder CurrentTy(this); - - // To make the situation simpler, we ask the subclass to remove this type from - // the type map, and to replace any type uses with uses of non-abstract types. - // This dramatically limits the amount of recursive type trouble we can find - // ourselves in. - dropAllTypeUses(); - - // Iterate over all of the uses of this type, invoking callback. Each user - // should remove itself from our use list automatically. We have to check to - // make sure that NewTy doesn't _become_ 'this'. If it does, resolving types - // will not cause users to drop off of the use list. If we resolve to ourself - // we succeed! - // - while (!AbstractTypeUsers.empty() && NewTy != this) { - AbstractTypeUser *User = AbstractTypeUsers.back(); - - unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize; -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User - << "] of abstract type [" << (void*)this << " " - << *this << "] to [" << (void*)NewTy.get() << " " - << *NewTy << "]!\n"); -#endif - User->refineAbstractType(this, NewTy); - - assert(AbstractTypeUsers.size() != OldSize && - "AbsTyUser did not remove self from user list!"); - } - - // If we were successful removing all users from the type, 'this' will be - // deleted when the last PATypeHolder is destroyed or updated from this type. - // This may occur on exit of this function, as the CurrentTy object is - // destroyed. -} - -// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that -// the current type has transitioned from being abstract to being concrete. -// -void DerivedType::notifyUsesThatTypeBecameConcrete() { -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n"); -#endif - - unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize; - while (!AbstractTypeUsers.empty()) { - AbstractTypeUser *ATU = AbstractTypeUsers.back(); - ATU->typeBecameConcrete(this); - - assert(AbstractTypeUsers.size() < OldSize-- && - "AbstractTypeUser did not remove itself from the use list!"); - } -} - -// refineAbstractType - Called when a contained type is found to be more -// concrete - this could potentially change us from an abstract type to a -// concrete type. -// -void FunctionType::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - LLVMContextImpl *pImpl = OldType->getContext().pImpl; - pImpl->FunctionTypes.RefineAbstractType(this, OldType, NewType); + if (Entry == 0) + Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements); + return Entry; } -void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) { - LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; - pImpl->FunctionTypes.TypeBecameConcrete(this, AbsTy); +bool ArrayType::isValidElementType(const Type *ElemTy) { + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); } +//===----------------------------------------------------------------------===// +// VectorType Implementation +//===----------------------------------------------------------------------===// -// refineAbstractType - Called when a contained type is found to be more -// concrete - this could potentially change us from an abstract type to a -// concrete type. -// -void ArrayType::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - LLVMContextImpl *pImpl = OldType->getContext().pImpl; - pImpl->ArrayTypes.RefineAbstractType(this, OldType, NewType); +VectorType::VectorType(Type *ElType, unsigned NumEl) + : SequentialType(VectorTyID, ElType) { + NumElements = NumEl; } -void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) { - LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; - pImpl->ArrayTypes.TypeBecameConcrete(this, AbsTy); +VectorType *VectorType::get(const Type *elementType, unsigned NumElements) { + Type *ElementType = const_cast<Type*>(elementType); + assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0"); + assert(isValidElementType(ElementType) && + "Elements of a VectorType must be a primitive type"); + + LLVMContextImpl *pImpl = ElementType->getContext().pImpl; + VectorType *&Entry = ElementType->getContext().pImpl + ->VectorTypes[std::make_pair(ElementType, NumElements)]; + + if (Entry == 0) + Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements); + return Entry; } -// refineAbstractType - Called when a contained type is found to be more -// concrete - this could potentially change us from an abstract type to a -// concrete type. -// -void VectorType::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - LLVMContextImpl *pImpl = OldType->getContext().pImpl; - pImpl->VectorTypes.RefineAbstractType(this, OldType, NewType); +bool VectorType::isValidElementType(const Type *ElemTy) { + return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy(); } -void VectorType::typeBecameConcrete(const DerivedType *AbsTy) { - LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; - pImpl->VectorTypes.TypeBecameConcrete(this, AbsTy); -} +//===----------------------------------------------------------------------===// +// PointerType Implementation +//===----------------------------------------------------------------------===// -// refineAbstractType - Called when a contained type is found to be more -// concrete - this could potentially change us from an abstract type to a -// concrete type. -// -void StructType::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - LLVMContextImpl *pImpl = OldType->getContext().pImpl; - pImpl->StructTypes.RefineAbstractType(this, OldType, NewType); -} +PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) { + Type *EltTy = const_cast<Type*>(eltTy); + assert(EltTy && "Can't get a pointer to <null> type!"); + assert(isValidElementType(EltTy) && "Invalid type for pointer element!"); + + LLVMContextImpl *CImpl = EltTy->getContext().pImpl; + + // Since AddressSpace #0 is the common case, we special case it. + PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy] + : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)]; -void StructType::typeBecameConcrete(const DerivedType *AbsTy) { - LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; - pImpl->StructTypes.TypeBecameConcrete(this, AbsTy); + if (Entry == 0) + Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace); + return Entry; } -// refineAbstractType - Called when a contained type is found to be more -// concrete - this could potentially change us from an abstract type to a -// concrete type. -// -void PointerType::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - LLVMContextImpl *pImpl = OldType->getContext().pImpl; - pImpl->PointerTypes.RefineAbstractType(this, OldType, NewType); -} -void PointerType::typeBecameConcrete(const DerivedType *AbsTy) { - LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; - pImpl->PointerTypes.TypeBecameConcrete(this, AbsTy); +PointerType::PointerType(Type *E, unsigned AddrSpace) + : SequentialType(PointerTyID, E) { + setSubclassData(AddrSpace); } -bool SequentialType::indexValid(const Value *V) const { - if (V->getType()->isIntegerTy()) - return true; - return false; +PointerType *Type::getPointerTo(unsigned addrs) const { + return PointerType::get(this, addrs); } -namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, const Type &T) { - T.print(OS); - return OS; -} +bool PointerType::isValidElementType(const Type *ElemTy) { + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy(); } diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp deleted file mode 100644 index d68a44bd6711..000000000000 --- a/lib/VMCore/TypeSymbolTable.cpp +++ /dev/null @@ -1,169 +0,0 @@ -//===-- TypeSymbolTable.cpp - Implement the TypeSymbolTable class ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the TypeSymbolTable class for the VMCore library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/TypeSymbolTable.h" -#include "llvm/DerivedTypes.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -using namespace llvm; - -#define DEBUG_SYMBOL_TABLE 0 -#define DEBUG_ABSTYPE 0 - -TypeSymbolTable::~TypeSymbolTable() { - // Drop all abstract type references in the type plane... - for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) { - if (TI->second->isAbstract()) // If abstract, drop the reference... - cast<DerivedType>(TI->second)->removeAbstractTypeUser(this); - } -} - -std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const { - std::string TryName = BaseName; - - const_iterator End = tmap.end(); - - // See if the name exists - while (tmap.find(TryName) != End) // Loop until we find a free - TryName = BaseName.str() + utostr(++LastUnique); // name in the symbol table - return TryName; -} - -// lookup a type by name - returns null on failure -Type* TypeSymbolTable::lookup(StringRef Name) const { - const_iterator TI = tmap.find(Name); - Type* result = 0; - if (TI != tmap.end()) - result = const_cast<Type*>(TI->second); - return result; -} - -// remove - Remove a type from the symbol table... -Type* TypeSymbolTable::remove(iterator Entry) { - assert(Entry != tmap.end() && "Invalid entry to remove!"); - const Type* Result = Entry->second; - -#if DEBUG_SYMBOL_TABLE - dump(); - dbgs() << " Removing Value: " << Result->getDescription() << "\n"; -#endif - - tmap.erase(Entry); - - // If we are removing an abstract type, remove the symbol table from it's use - // list... - if (Result->isAbstract()) { -#if DEBUG_ABSTYPE - dbgs() << "Removing abstract type from symtab" - << Result->getDescription() - << "\n"; -#endif - cast<DerivedType>(Result)->removeAbstractTypeUser(this); - } - - return const_cast<Type*>(Result); -} - - -// insert - Insert a type into the symbol table with the specified name... -void TypeSymbolTable::insert(StringRef Name, const Type* T) { - assert(T && "Can't insert null type into symbol table!"); - - if (tmap.insert(std::make_pair(Name, T)).second) { - // Type inserted fine with no conflict. - -#if DEBUG_SYMBOL_TABLE - dump(); - dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n"; -#endif - } else { - // If there is a name conflict... - - // Check to see if there is a naming conflict. If so, rename this type! - std::string UniqueName = Name; - if (lookup(Name)) - UniqueName = getUniqueName(Name); - -#if DEBUG_SYMBOL_TABLE - dump(); - dbgs() << " Inserting type: " << UniqueName << ": " - << T->getDescription() << "\n"; -#endif - - // Insert the tmap entry - tmap.insert(make_pair(UniqueName, T)); - } - - // If we are adding an abstract type, add the symbol table to it's use list. - if (T->isAbstract()) { - cast<DerivedType>(T)->addAbstractTypeUser(this); -#if DEBUG_ABSTYPE - dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n"; -#endif - } -} - -// This function is called when one of the types in the type plane are refined -void TypeSymbolTable::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - // Loop over all of the types in the symbol table, replacing any references - // to OldType with references to NewType. Note that there may be multiple - // occurrences, and although we only need to remove one at a time, it's - // faster to remove them all in one pass. - // - for (iterator I = begin(), E = end(); I != E; ++I) { - // FIXME when Types aren't const. - if (I->second == const_cast<DerivedType *>(OldType)) { -#if DEBUG_ABSTYPE - dbgs() << "Removing type " << OldType->getDescription() << "\n"; -#endif - OldType->removeAbstractTypeUser(this); - - // TODO FIXME when types aren't const - I->second = const_cast<Type *>(NewType); - if (NewType->isAbstract()) { -#if DEBUG_ABSTYPE - dbgs() << "Added type " << NewType->getDescription() << "\n"; -#endif - cast<DerivedType>(NewType)->addAbstractTypeUser(this); - } - } - } -} - - -// Handle situation where type becomes Concreate from Abstract -void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) { - // Loop over all of the types in the symbol table, dropping any abstract - // type user entries for AbsTy which occur because there are names for the - // type. - for (iterator TI = begin(), TE = end(); TI != TE; ++TI) - if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy))) - AbsTy->removeAbstractTypeUser(this); -} - -static void DumpTypes(const std::pair<const std::string, const Type*>& T ) { - dbgs() << " '" << T.first << "' = "; - T.second->dump(); - dbgs() << "\n"; -} - -void TypeSymbolTable::dump() const { - dbgs() << "TypeSymbolPlane: "; - for_each(tmap.begin(), tmap.end(), DumpTypes); -} - diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h deleted file mode 100644 index ad09478bbcfd..000000000000 --- a/lib/VMCore/TypesContext.h +++ /dev/null @@ -1,426 +0,0 @@ -//===-- TypesContext.h - Types-related Context Internals ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines various helper methods and classes used by -// LLVMContextImpl for creating and managing types. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TYPESCONTEXT_H -#define LLVM_TYPESCONTEXT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include <map> - - -//===----------------------------------------------------------------------===// -// Derived Type Factory Functions -//===----------------------------------------------------------------------===// -namespace llvm { - -/// getSubElementHash - Generate a hash value for all of the SubType's of this -/// type. The hash value is guaranteed to be zero if any of the subtypes are -/// an opaque type. Otherwise we try to mix them in as well as possible, but do -/// not look at the subtype's subtype's. -static unsigned getSubElementHash(const Type *Ty) { - unsigned HashVal = 0; - for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); - I != E; ++I) { - HashVal *= 32; - const Type *SubTy = I->get(); - HashVal += SubTy->getTypeID(); - switch (SubTy->getTypeID()) { - default: break; - case Type::OpaqueTyID: return 0; // Opaque -> hash = 0 no matter what. - case Type::IntegerTyID: - HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3); - break; - case Type::FunctionTyID: - HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 + - cast<FunctionType>(SubTy)->isVarArg(); - break; - case Type::ArrayTyID: - HashVal ^= cast<ArrayType>(SubTy)->getNumElements(); - break; - case Type::VectorTyID: - HashVal ^= cast<VectorType>(SubTy)->getNumElements(); - break; - case Type::StructTyID: - HashVal ^= cast<StructType>(SubTy)->getNumElements(); - break; - case Type::PointerTyID: - HashVal ^= cast<PointerType>(SubTy)->getAddressSpace(); - break; - } - } - return HashVal ? HashVal : 1; // Do not return zero unless opaque subty. -} - -//===----------------------------------------------------------------------===// -// Integer Type Factory... -// -class IntegerValType { - uint32_t bits; -public: - IntegerValType(uint32_t numbits) : bits(numbits) {} - - static IntegerValType get(const IntegerType *Ty) { - return IntegerValType(Ty->getBitWidth()); - } - - static unsigned hashTypeStructure(const IntegerType *Ty) { - return (unsigned)Ty->getBitWidth(); - } - - inline bool operator<(const IntegerValType &IVT) const { - return bits < IVT.bits; - } -}; - -// PointerValType - Define a class to hold the key that goes into the TypeMap -// -class PointerValType { - const Type *ValTy; - unsigned AddressSpace; -public: - PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {} - - static PointerValType get(const PointerType *PT) { - return PointerValType(PT->getElementType(), PT->getAddressSpace()); - } - - static unsigned hashTypeStructure(const PointerType *PT) { - return getSubElementHash(PT); - } - - bool operator<(const PointerValType &MTV) const { - if (AddressSpace < MTV.AddressSpace) return true; - return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy; - } -}; - -//===----------------------------------------------------------------------===// -// Array Type Factory... -// -class ArrayValType { - const Type *ValTy; - uint64_t Size; -public: - ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {} - - static ArrayValType get(const ArrayType *AT) { - return ArrayValType(AT->getElementType(), AT->getNumElements()); - } - - static unsigned hashTypeStructure(const ArrayType *AT) { - return (unsigned)AT->getNumElements(); - } - - inline bool operator<(const ArrayValType &MTV) const { - if (Size < MTV.Size) return true; - return Size == MTV.Size && ValTy < MTV.ValTy; - } -}; - -//===----------------------------------------------------------------------===// -// Vector Type Factory... -// -class VectorValType { - const Type *ValTy; - unsigned Size; -public: - VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {} - - static VectorValType get(const VectorType *PT) { - return VectorValType(PT->getElementType(), PT->getNumElements()); - } - - static unsigned hashTypeStructure(const VectorType *PT) { - return PT->getNumElements(); - } - - inline bool operator<(const VectorValType &MTV) const { - if (Size < MTV.Size) return true; - return Size == MTV.Size && ValTy < MTV.ValTy; - } -}; - -// StructValType - Define a class to hold the key that goes into the TypeMap -// -class StructValType { - std::vector<const Type*> ElTypes; - bool packed; -public: - StructValType(ArrayRef<const Type*> args, bool isPacked) - : ElTypes(args.vec()), packed(isPacked) {} - - static StructValType get(const StructType *ST) { - std::vector<const Type *> ElTypes; - ElTypes.reserve(ST->getNumElements()); - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) - ElTypes.push_back(ST->getElementType(i)); - - return StructValType(ElTypes, ST->isPacked()); - } - - static unsigned hashTypeStructure(const StructType *ST) { - return ST->getNumElements(); - } - - inline bool operator<(const StructValType &STV) const { - if (ElTypes < STV.ElTypes) return true; - else if (ElTypes > STV.ElTypes) return false; - else return (int)packed < (int)STV.packed; - } -}; - -// FunctionValType - Define a class to hold the key that goes into the TypeMap -// -class FunctionValType { - const Type *RetTy; - std::vector<const Type*> ArgTypes; - bool isVarArg; -public: - FunctionValType(const Type *ret, ArrayRef<const Type*> args, bool isVA) - : RetTy(ret), ArgTypes(args.vec()), isVarArg(isVA) {} - - static FunctionValType get(const FunctionType *FT); - - static unsigned hashTypeStructure(const FunctionType *FT) { - unsigned Result = FT->getNumParams()*2 + FT->isVarArg(); - return Result; - } - - inline bool operator<(const FunctionValType &MTV) const { - if (RetTy < MTV.RetTy) return true; - if (RetTy > MTV.RetTy) return false; - if (isVarArg < MTV.isVarArg) return true; - if (isVarArg > MTV.isVarArg) return false; - if (ArgTypes < MTV.ArgTypes) return true; - if (ArgTypes > MTV.ArgTypes) return false; - return false; - } -}; - -class TypeMapBase { -protected: - /// TypesByHash - Keep track of types by their structure hash value. Note - /// that we only keep track of types that have cycles through themselves in - /// this map. - /// - std::multimap<unsigned, PATypeHolder> TypesByHash; - - ~TypeMapBase() { - // PATypeHolder won't destroy non-abstract types. - // We can't destroy them by simply iterating, because - // they may contain references to each-other. - for (std::multimap<unsigned, PATypeHolder>::iterator I - = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) { - Type *Ty = const_cast<Type*>(I->second.Ty); - I->second.destroy(); - // We can't invoke destroy or delete, because the type may - // contain references to already freed types. - // So we have to destruct the object the ugly way. - if (Ty) { - Ty->AbstractTypeUsers.clear(); - static_cast<const Type*>(Ty)->Type::~Type(); - operator delete(Ty); - } - } - } - -public: - void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) { - std::multimap<unsigned, PATypeHolder>::iterator I = - TypesByHash.lower_bound(Hash); - for (; I != TypesByHash.end() && I->first == Hash; ++I) { - if (I->second == Ty) { - TypesByHash.erase(I); - return; - } - } - - // This must be do to an opaque type that was resolved. Switch down to hash - // code of zero. - assert(Hash && "Didn't find type entry!"); - RemoveFromTypesByHash(0, Ty); - } - - /// TypeBecameConcrete - When Ty gets a notification that TheType just became - /// concrete, drop uses and make Ty non-abstract if we should. - void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) { - // If the element just became concrete, remove 'ty' from the abstract - // type user list for the type. Do this for as many times as Ty uses - // OldType. - for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); - I != E; ++I) - if (I->get() == TheType) - TheType->removeAbstractTypeUser(Ty); - - // If the type is currently thought to be abstract, rescan all of our - // subtypes to see if the type has just become concrete! Note that this - // may send out notifications to AbstractTypeUsers that types become - // concrete. - if (Ty->isAbstract()) - Ty->PromoteAbstractToConcrete(); - } -}; - -// TypeMap - Make sure that only one instance of a particular type may be -// created on any given run of the compiler... note that this involves updating -// our map if an abstract type gets refined somehow. -// -template<class ValType, class TypeClass> -class TypeMap : public TypeMapBase { - std::map<ValType, PATypeHolder> Map; -public: - typedef typename std::map<ValType, PATypeHolder>::iterator iterator; - - inline TypeClass *get(const ValType &V) { - iterator I = Map.find(V); - return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0; - } - - inline void add(const ValType &V, TypeClass *Ty) { - Map.insert(std::make_pair(V, Ty)); - - // If this type has a cycle, remember it. - TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty)); - print("add"); - } - - /// RefineAbstractType - This method is called after we have merged a type - /// with another one. We must now either merge the type away with - /// some other type or reinstall it in the map with it's new configuration. - void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType, - const Type *NewType) { -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType - << "], " << (void*)NewType << " [" << *NewType << "])\n"); -#endif - - // Otherwise, we are changing one subelement type into another. Clearly the - // OldType must have been abstract, making us abstract. - assert(Ty->isAbstract() && "Refining a non-abstract type!"); - assert(OldType != NewType); - - // Make a temporary type holder for the type so that it doesn't disappear on - // us when we erase the entry from the map. - PATypeHolder TyHolder = Ty; - - // The old record is now out-of-date, because one of the children has been - // updated. Remove the obsolete entry from the map. - unsigned NumErased = Map.erase(ValType::get(Ty)); - assert(NumErased && "Element not found!"); (void)NumErased; - - // Remember the structural hash for the type before we start hacking on it, - // in case we need it later. - unsigned OldTypeHash = ValType::hashTypeStructure(Ty); - - // Find the type element we are refining... and change it now! - for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) - if (Ty->ContainedTys[i] == OldType) - Ty->ContainedTys[i] = NewType; - unsigned NewTypeHash = ValType::hashTypeStructure(Ty); - - // If there are no cycles going through this node, we can do a simple, - // efficient lookup in the map, instead of an inefficient nasty linear - // lookup. - if (!TypeHasCycleThroughItself(Ty)) { - typename std::map<ValType, PATypeHolder>::iterator I; - bool Inserted; - - tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty)); - if (!Inserted) { - // Refined to a different type altogether? - RemoveFromTypesByHash(OldTypeHash, Ty); - - // We already have this type in the table. Get rid of the newly refined - // type. - TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get()); - Ty->refineAbstractTypeTo(NewTy); - return; - } - } else { - // Now we check to see if there is an existing entry in the table which is - // structurally identical to the newly refined type. If so, this type - // gets refined to the pre-existing type. - // - std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry; - tie(I, E) = TypesByHash.equal_range(NewTypeHash); - Entry = E; - for (; I != E; ++I) { - if (I->second == Ty) { - // Remember the position of the old type if we see it in our scan. - Entry = I; - continue; - } - - if (!TypesEqual(Ty, I->second)) - continue; - - TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get()); - - // Remove the old entry form TypesByHash. If the hash values differ - // now, remove it from the old place. Otherwise, continue scanning - // within this hashcode to reduce work. - if (NewTypeHash != OldTypeHash) { - RemoveFromTypesByHash(OldTypeHash, Ty); - } else { - if (Entry == E) { - // Find the location of Ty in the TypesByHash structure if we - // haven't seen it already. - while (I->second != Ty) { - ++I; - assert(I != E && "Structure doesn't contain type??"); - } - Entry = I; - } - TypesByHash.erase(Entry); - } - Ty->refineAbstractTypeTo(NewTy); - return; - } - - // If there is no existing type of the same structure, we reinsert an - // updated record into the map. - Map.insert(std::make_pair(ValType::get(Ty), Ty)); - } - - // If the hash codes differ, update TypesByHash - if (NewTypeHash != OldTypeHash) { - RemoveFromTypesByHash(OldTypeHash, Ty); - TypesByHash.insert(std::make_pair(NewTypeHash, Ty)); - } - - // If the type is currently thought to be abstract, rescan all of our - // subtypes to see if the type has just become concrete! Note that this - // may send out notifications to AbstractTypeUsers that types become - // concrete. - if (Ty->isAbstract()) - Ty->PromoteAbstractToConcrete(); - } - - void print(const char *Arg) const { -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n"); - unsigned i = 0; - for (typename std::map<ValType, PATypeHolder>::const_iterator I - = Map.begin(), E = Map.end(); I != E; ++I) - DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " " - << *I->second.get() << "\n"); -#endif - } - - void dump() const { print("dump output"); } -}; -} - -#endif diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp index 2258b8d985ae..359a1517ab79 100644 --- a/lib/VMCore/Use.cpp +++ b/lib/VMCore/Use.cpp @@ -135,11 +135,9 @@ void Use::zap(Use *Start, const Use *Stop, bool del) { User *Use::getUser() const { const Use *End = getImpliedUser(); - const PointerIntPair<User*, 1, unsigned>& - ref(static_cast<const AugmentedUse*>(End - 1)->ref); - User *She = ref.getPointer(); - return ref.getInt() - ? She + const UserRef *ref = reinterpret_cast<const UserRef*>(End); + return ref->getInt() + ? ref->getPointer() : (User*)End; } diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp index 2f4587debb66..f01fa349adfd 100644 --- a/lib/VMCore/User.cpp +++ b/lib/VMCore/User.cpp @@ -40,14 +40,12 @@ void User::replaceUsesOfWith(Value *From, Value *To) { //===----------------------------------------------------------------------===// Use *User::allocHungoffUses(unsigned N) const { - Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N - + sizeof(AugmentedUse) - - sizeof(Use))); + // Allocate the array of Uses, followed by a pointer (with bottom bit set) to + // the User. + size_t size = N * sizeof(Use) + sizeof(Use::UserRef); + Use *Begin = static_cast<Use*>(::operator new(size)); Use *End = Begin + N; - PointerIntPair<User*, 1, unsigned>& - ref(static_cast<AugmentedUse&>(End[-1]).ref); - ref.setPointer(const_cast<User*>(this)); - ref.setInt(1); + (void) new(End) Use::UserRef(const_cast<User*>(this), 1); return Use::initTags(Begin, End); } diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 29f6a8094f0b..f1815e377edc 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -35,22 +35,21 @@ using namespace llvm; // Value Class //===----------------------------------------------------------------------===// -static inline const Type *checkType(const Type *Ty) { +static inline Type *checkType(const Type *Ty) { assert(Ty && "Value defined with a null type: Error!"); - return Ty; + return const_cast<Type*>(Ty); } Value::Value(const Type *ty, unsigned scid) : SubclassID(scid), HasValueHandle(0), - SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)), + SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)), UseList(0), Name(0) { + // FIXME: Why isn't this in the subclass gunk?? if (isa<CallInst>(this) || isa<InvokeInst>(this)) - assert((VTy->isFirstClassType() || VTy->isVoidTy() || - ty->isOpaqueTy() || VTy->isStructTy()) && - "invalid CallInst type!"); + assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) && + "invalid CallInst type!"); else if (!isa<Constant>(this) && !isa<BasicBlock>(this)) - assert((VTy->isFirstClassType() || VTy->isVoidTy() || - ty->isOpaqueTy()) && + assert((VTy->isFirstClassType() || VTy->isVoidTy()) && "Cannot create non-first-class values except for constants!"); } @@ -281,17 +280,16 @@ void Value::takeName(Value *V) { } -// uncheckedReplaceAllUsesWith - This is exactly the same as replaceAllUsesWith, -// except that it doesn't have all of the asserts. The asserts fail because we -// are half-way done resolving types, which causes some types to exist as two -// different Type*'s at the same time. This is a sledgehammer to work around -// this problem. -// -void Value::uncheckedReplaceAllUsesWith(Value *New) { +void Value::replaceAllUsesWith(Value *New) { + assert(New && "Value::replaceAllUsesWith(<null>) is invalid!"); + assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!"); + assert(New->getType() == getType() && + "replaceAllUses of value with new value of different type!"); + // Notify all ValueHandles (if present) that this value is going away. if (HasValueHandle) ValueHandleBase::ValueIsRAUWd(this, New); - + while (!use_empty()) { Use &U = *UseList; // Must handle Constants specially, we cannot call replaceUsesOfWith on a @@ -302,18 +300,12 @@ void Value::uncheckedReplaceAllUsesWith(Value *New) { continue; } } - + U.set(New); } -} - -void Value::replaceAllUsesWith(Value *New) { - assert(New && "Value::replaceAllUsesWith(<null>) is invalid!"); - assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!"); - assert(New->getType() == getType() && - "replaceAllUses of value with new value of different type!"); - - uncheckedReplaceAllUsesWith(New); + + if (BasicBlock *BB = dyn_cast<BasicBlock>(this)) + BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New)); } Value *Value::stripPointerCasts() { diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp index 254bf06439d9..f1c970361a50 100644 --- a/lib/VMCore/ValueSymbolTable.cpp +++ b/lib/VMCore/ValueSymbolTable.cpp @@ -25,7 +25,7 @@ ValueSymbolTable::~ValueSymbolTable() { #ifndef NDEBUG // Only do this in -g mode... for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI) dbgs() << "Value still in symbol table! Type = '" - << VI->getValue()->getType()->getDescription() << "' Name = '" + << *VI->getValue()->getType() << "' Name = '" << VI->getKeyData() << "'\n"; assert(vmap.empty() && "Values remain in symbol table!"); #endif diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index c054ae46f23b..21a1f034446a 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -133,6 +133,7 @@ std::string EVT::getEVTString() const { case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::Metadata:return "Metadata"; + case MVT::untyped: return "untyped"; } } diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 139e03523019..b146b896cbfb 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -49,7 +49,6 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/ValueTypes.h" @@ -109,54 +108,6 @@ INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", static char &PreVerifyID = PreVerifier::ID; namespace { - class TypeSet : public AbstractTypeUser { - public: - TypeSet() {} - - /// Insert a type into the set of types. - bool insert(const Type *Ty) { - if (!Types.insert(Ty)) - return false; - if (Ty->isAbstract()) - Ty->addAbstractTypeUser(this); - return true; - } - - // Remove ourselves as abstract type listeners for any types that remain - // abstract when the TypeSet is destroyed. - ~TypeSet() { - for (SmallSetVector<const Type *, 16>::iterator I = Types.begin(), - E = Types.end(); I != E; ++I) { - const Type *Ty = *I; - if (Ty->isAbstract()) - Ty->removeAbstractTypeUser(this); - } - } - - // Abstract type user interface. - - /// Remove types from the set when refined. Do not insert the type it was - /// refined to because that type hasn't been verified yet. - void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) { - Types.remove(OldTy); - OldTy->removeAbstractTypeUser(this); - } - - /// Stop listening for changes to a type which is no longer abstract. - void typeBecameConcrete(const DerivedType *AbsTy) { - AbsTy->removeAbstractTypeUser(this); - } - - void dump() const {} - - private: - SmallSetVector<const Type *, 16> Types; - - // Disallow copying. - TypeSet(const TypeSet &); - TypeSet &operator=(const TypeSet &); - }; - struct Verifier : public FunctionPass, public InstVisitor<Verifier> { static char ID; // Pass ID, replacement for typeid bool Broken; // Is this module found to be broken? @@ -176,9 +127,6 @@ namespace { /// an instruction in the same block. SmallPtrSet<Instruction*, 16> InstsInThisBlock; - /// Types - keep track of the types that have been checked already. - TypeSet Types; - /// MDNodes - keep track of the metadata nodes that have been checked /// already. SmallPtrSet<MDNode *, 32> MDNodes; @@ -199,7 +147,6 @@ namespace { bool doInitialization(Module &M) { Mod = &M; Context = &M.getContext(); - verifyTypeSymbolTable(M.getTypeSymbolTable()); // If this is a real pass, in a pass manager, we must abort before // returning back to the pass manager, or else the pass manager may try to @@ -285,7 +232,6 @@ namespace { // Verification methods... - void verifyTypeSymbolTable(TypeSymbolTable &ST); void visitGlobalValue(GlobalValue &GV); void visitGlobalVariable(GlobalVariable &GV); void visitGlobalAlias(GlobalAlias &GA); @@ -345,7 +291,6 @@ namespace { bool isReturnValue, const Value *V); void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs, const Value *V); - void VerifyType(const Type *Ty); void WriteValue(const Value *V) { if (!V) return; @@ -359,8 +304,7 @@ namespace { void WriteType(const Type *T) { if (!T) return; - MessagesStr << ' '; - WriteTypeSymbolic(MessagesStr, T, Mod); + MessagesStr << ' ' << *T; } @@ -568,11 +512,6 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { } } -void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) { - for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I) - VerifyType(I->second); -} - // VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty, @@ -1139,9 +1078,6 @@ void Verifier::visitPHINode(PHINode &PN) { for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { Assert1(PN.getType() == PN.getIncomingValue(i)->getType(), "PHI node operands are not the same type as the result!", &PN); - Assert1(isa<BasicBlock>(PN.getOperand( - PHINode::getOperandNumForIncomingBlock(i))), - "PHI node incoming block is not a BasicBlock!", &PN); } // All other PHI node constraints are checked in the visitBasicBlock method. @@ -1195,11 +1131,11 @@ void Verifier::VerifyCallSite(CallSite CS) { } // Verify that there's no metadata unless it's a direct call to an intrinsic. - if (!CS.getCalledFunction() || + if (CS.getCalledFunction() == 0 || !CS.getCalledFunction()->getName().startswith("llvm.")) { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) - Assert1(!PI->get()->isMetadataTy(), + Assert1(!(*PI)->isMetadataTy(), "Function has metadata parameter but isn't an intrinsic", I); } @@ -1382,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { void Verifier::visitExtractValueInst(ExtractValueInst &EVI) { Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(), - EVI.idx_begin(), EVI.idx_end()) == + EVI.getIndices()) == EVI.getType(), "Invalid ExtractValueInst operands!", &EVI); @@ -1391,7 +1327,7 @@ void Verifier::visitExtractValueInst(ExtractValueInst &EVI) { void Verifier::visitInsertValueInst(InsertValueInst &IVI) { Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(), - IVI.idx_begin(), IVI.idx_end()) == + IVI.getIndices()) == IVI.getOperand(1)->getType(), "Invalid InsertValueInst operands!", &IVI); @@ -1482,8 +1418,10 @@ void Verifier::visitInstruction(Instruction &I) { // PHI nodes differ from other nodes because they actually "use" the // value in the predecessor basic blocks they correspond to. BasicBlock *UseBlock = BB; - if (isa<PHINode>(I)) - UseBlock = dyn_cast<BasicBlock>(I.getOperand(i+1)); + if (PHINode *PN = dyn_cast<PHINode>(&I)) { + unsigned j = PHINode::getIncomingValueNumForOperand(i); + UseBlock = PN->getIncomingBlock(j); + } Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB", Op, &I); @@ -1515,10 +1453,11 @@ void Verifier::visitInstruction(Instruction &I) { return; } } - } else if (isa<PHINode>(I)) { + } else if (PHINode *PN = dyn_cast<PHINode>(&I)) { // PHI nodes are more difficult than other nodes because they actually // "use" the value in the predecessor basic blocks they correspond to. - BasicBlock *PredBB = dyn_cast<BasicBlock>(I.getOperand(i+1)); + unsigned j = PHINode::getIncomingValueNumForOperand(i); + BasicBlock *PredBB = PN->getIncomingBlock(j); Assert2(PredBB && (DT->dominates(OpBlock, PredBB) || !DT->isReachableFromEntry(PredBB)), "Instruction does not dominate all uses!", Op, &I); @@ -1542,69 +1481,6 @@ void Verifier::visitInstruction(Instruction &I) { } } InstsInThisBlock.insert(&I); - - VerifyType(I.getType()); -} - -/// VerifyType - Verify that a type is well formed. -/// -void Verifier::VerifyType(const Type *Ty) { - if (!Types.insert(Ty)) return; - - Assert1(Context == &Ty->getContext(), - "Type context does not match Module context!", Ty); - - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - const FunctionType *FTy = cast<FunctionType>(Ty); - - const Type *RetTy = FTy->getReturnType(); - Assert2(FunctionType::isValidReturnType(RetTy), - "Function type with invalid return type", RetTy, FTy); - VerifyType(RetTy); - - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { - const Type *ElTy = FTy->getParamType(i); - Assert2(FunctionType::isValidArgumentType(ElTy), - "Function type with invalid parameter type", ElTy, FTy); - VerifyType(ElTy); - } - break; - } - case Type::StructTyID: { - const StructType *STy = cast<StructType>(Ty); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - const Type *ElTy = STy->getElementType(i); - Assert2(StructType::isValidElementType(ElTy), - "Structure type with invalid element type", ElTy, STy); - VerifyType(ElTy); - } - break; - } - case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); - Assert1(ArrayType::isValidElementType(ATy->getElementType()), - "Array type with invalid element type", ATy); - VerifyType(ATy->getElementType()); - break; - } - case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(Ty); - Assert1(PointerType::isValidElementType(PTy->getElementType()), - "Pointer type with invalid element type", PTy); - VerifyType(PTy->getElementType()); - break; - } - case Type::VectorTyID: { - const VectorType *VTy = cast<VectorType>(Ty); - Assert1(VectorType::isValidElementType(VTy->getElementType()), - "Vector type with invalid element type", VTy); - VerifyType(VTy->getElementType()); - break; - } - default: - break; - } } // Flags used by TableGen to mark intrinsic parameters with the |