diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2012-12-02 13:10:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2012-12-02 13:10:19 +0000 |
commit | 522600a229b950314b5f4af84eba4f3e8a0ffea1 (patch) | |
tree | 32b4679ab4b8f28e5228daafc65e9dc436935353 /lib/Transforms/Utils | |
parent | 902a7b529820e6a0aa85f98f21afaeb1805a22f8 (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Utils')
21 files changed, 3399 insertions, 439 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index d83145289ce2c..6815e411b4211 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -16,7 +16,7 @@ #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const { OS << ']'; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. @@ -219,7 +221,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned VariableScale = 0; int64_t ConstantOffset = 0; - const TargetData *TD = TLI.getTargetData(); + const DataLayout *TD = TLI.getDataLayout(); gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { if (StructType *STy = dyn_cast<StructType>(*GTI)) { diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2679b933f6b0b..9fea11391a1d4 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/ErrorHandling.h" @@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -bool llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector<WeakVH, 8> PHIs; @@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) - Changed |= RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); return Changed; } @@ -687,3 +687,42 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast<ReturnInst>(NewRet); } +/// SplitBlockAndInsertIfThen - Split the containing block at the +/// specified instruction - everything before and including Cmp stays +/// in the old basic block, and everything after Cmp is moved to a +/// new block. The two blocks are connected by a conditional branch +/// (with value of Cmp being the condition). +/// Before: +/// Head +/// Cmp +/// Tail +/// After: +/// Head +/// Cmp +/// if (Cmp) +/// ThenBlock +/// Tail +/// +/// If Unreachable is true, then ThenBlock ends with +/// UnreachableInst, otherwise it branches to Tail. +/// Returns the NewBasicBlock's terminator. + +TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, + bool Unreachable, MDNode *BranchWeights) { + Instruction *SplitBefore = Cmp->getNextNode(); + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else + CheckTerm = BranchInst::Create(Tail, ThenBlock); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + return CheckTerm; +} diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index e13fd716fa856..74b2ee10e01de 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -22,7 +22,7 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; @@ -34,19 +34,22 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), + Constant *StrLen = M->getOrInsertFunction("strlen", + AttrListPtr::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -61,18 +64,21 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, /// specified pointer. Ptr is required to be some pointer type, MaxLen must /// be of size_t type, and the return value has 'intptr_t' type. Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + Constant *StrNLen = M->getOrInsertFunction("strnlen", + AttrListPtr::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -88,17 +94,21 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); + Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; AttributeWithIndex AWI = - AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI), + Constant *StrChr = M->getOrInsertFunction("strchr", + AttrListPtr::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -109,20 +119,23 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, /// EmitStrNCmp - Emit a call to the strncmp function to the builder. Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), + Value *StrNCmp = M->getOrInsertFunction("strncmp", + AttrListPtr::get(M->getContext(), + AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -139,17 +152,19 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrCpy = M->getOrInsertFunction(Name, + AttrListPtr::get(M->getContext(), AWI), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -161,17 +176,20 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrNCpy = M->getOrInsertFunction(Name, + AttrListPtr::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -185,17 +203,18 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttrListPtr::get(AWI), + AttrListPtr::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -212,16 +231,19 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD, + Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; + AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), + Value *MemChr = M->getOrInsertFunction("memchr", + AttrListPtr::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -237,20 +259,22 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, /// EmitMemCmp - Emit a call to the memcmp function. Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const TargetData *TD, + Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), + Value *MemCmp = M->getOrInsertFunction("memcmp", + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -294,7 +318,7 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) return 0; @@ -316,17 +340,19 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI), + Value *PutS = M->getOrInsertFunction("puts", + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -339,17 +365,19 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI), + F = M->getOrInsertFunction("fputc", + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -370,19 +398,21 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FPutsName, + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -400,21 +430,23 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, + Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FWriteName, + AttrListPtr::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -436,9 +468,9 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD, +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI) { - // We really need TargetData for later. + // We really need DataLayout for later. if (!TD) return false; this->CI = CI; diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000000000..bee2f7bcb6ead --- /dev/null +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,262 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" + +using namespace llvm; + +namespace { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static bool insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (isa<ConstantInt>(Divisor) || + (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return false; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + ++I; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) + Instr->replaceAllUsesWith(QuoPhi); + else + Instr->replaceAllUsesWith(RemPhi); + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = MainBuilder.getInt32(0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); + return true; +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static bool reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, + PerBBDivCache); + } + + // Replace operation value with previously generated phi node + DivPhiNodes &Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + ++J; + + // Remove redundant operation + Instr->eraseFromParent(); + return true; +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(Function &F, + Function::iterator &I, + const DenseMap<unsigned int, unsigned int> &BypassWidths) { + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) + continue; + + // Skip division on vector types, only optimize integer instructions + if (!J->getType()->isIntegerTy()) + continue; + + // Get bitwidth of div/rem instruction + IntegerType *T = cast<IntegerType>(J->getType()); + int bitwidth = T->getBitWidth(); + + // Continue if bitwidth is not bypassed + DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); + if (BI == BypassWidths.end()) + continue; + + // Get type for div/rem instruction with bypass bitwidth + IntegerType *BT = IntegerType::get(J->getContext(), BI->second); + + MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, + UseSignedOp, DivCache); + } + + return MadeChange; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 4ff31cae62cdd..620209bccbc84 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp + BypassSlowDivision.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp @@ -10,6 +11,7 @@ add_llvm_library(LLVMTransformUtils DemoteRegToStack.cpp InlineFunction.cpp InstructionNamer.cpp + IntegerDivision.cpp LCSSA.cpp Local.cpp LoopSimplify.cpp @@ -19,12 +21,14 @@ add_llvm_library(LLVMTransformUtils LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + MetaRenamer.cpp ModuleUtils.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp SimplifyIndVar.cpp SimplifyInstructions.cpp + SimplifyLibCalls.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 99237b8390ec0..7ba9f6d9d25df 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(0, OldFunc->getAttributes() + .addAttr(NewFunc->getContext(), + AttrListPtr::ReturnIndex, + OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(~0, OldFunc->getAttributes() + .addAttr(NewFunc->getContext(), + AttrListPtr::FunctionIndex, + OldFunc->getAttributes() .getFnAttributes())); } @@ -202,14 +206,14 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - const TargetData *TD; + const DataLayout *TD; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, const char *nameSuffix, ClonedCodeInfo *codeInfo, - const TargetData *td) + const DataLayout *td) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { @@ -365,7 +369,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const TargetData *TD, + const DataLayout *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index c545cd68c9870..281714f4c1001 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -346,7 +346,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, header->getName(), M); // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(true); + newFunction->setDoesNotThrow(); newFunction->getBasicBlockList().push_back(newRootNode); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 89e89e7acf3d7..009847f87bce9 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -27,7 +27,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -357,7 +357,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Type *VoidPtrTy = Type::getInt8PtrTy(Context); - // Create the alloca. If we have TargetData, use nice alignment. + // Create the alloca. If we have DataLayout, use nice alignment. unsigned Align = 1; if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp new file mode 100644 index 0000000000000..55227e2714e67 --- /dev/null +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -0,0 +1,420 @@ +//===-- IntegerDivision.cpp - Expand integer division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of 32bit scalar integer division for +// targets that don't have native support. It's largely derived from +// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the +// amount of control flow +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "integer-division" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IRBuilder.h" +#include "llvm/Transforms/Utils/IntegerDivision.h" + +using namespace llvm; + +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + +/// Generate code to divide two signed integers. Returns the quotient, rounded +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. +static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Implementation taken from compiler-rt's __divsi3 + + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %tmp = ashr i32 %dividend, 31 + // ; %tmp1 = ashr i32 %divisor, 31 + // ; %tmp2 = xor i32 %tmp, %dividend + // ; %u_dvnd = sub nsw i32 %tmp2, %tmp + // ; %tmp3 = xor i32 %tmp1, %divisor + // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1 + // ; %q_sgn = xor i32 %tmp1, %tmp + // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr + // ; %tmp4 = xor i32 %q_mag, %q_sgn + // ; %q = sub i32 %tmp4, %q_sgn + Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne); + Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne); + Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); + Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); + Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); + Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1); + Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp); + Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr); + Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn); + Value *Q = Builder.CreateSub(Tmp4, Q_Sgn); + + if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag)) + Builder.SetInsertPoint(UDiv); + + return Q; +} + +/// Generates code to divide two unsigned scalar 32-bit integers. Returns the +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. +static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // The basic algorithm can be found in the compiler-rt project's + // implementation of __udivsi3.c. Here, we do a lower-level IR based approach + // that's been hand-tuned to lessen the amount of control flow involved. + + // Some helper values + IntegerType *I32Ty = Builder.getInt32Ty(); + + ConstantInt *Zero = Builder.getInt32(0); + ConstantInt *One = Builder.getInt32(1); + ConstantInt *ThirtyOne = Builder.getInt32(31); + ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1); + ConstantInt *True = Builder.getTrue(); + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + I32Ty); + + // Our CFG is going to look like: + // +---------------------+ + // | special-cases | + // | ... | + // +---------------------+ + // | | + // | +----------+ + // | | bb1 | + // | | ... | + // | +----------+ + // | | | + // | | +------------+ + // | | | preheader | + // | | | ... | + // | | +------------+ + // | | | + // | | | +---+ + // | | | | | + // | | +------------+ | + // | | | do-while | | + // | | | ... | | + // | | +------------+ | + // | | | | | + // | +-----------+ +---+ + // | | loop-exit | + // | | ... | + // | +-----------+ + // | | + // +-------+ + // | ... | + // | end | + // +-------+ + BasicBlock *SpecialCases = Builder.GetInsertBlock(); + SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); + BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), + "udiv-end"); + BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), + "udiv-loop-exit", F, End); + BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), + "udiv-do-while", F, End); + BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), + "udiv-preheader", F, End); + BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), + "udiv-bb1", F, End); + + // We'll be overwriting the terminator to insert our extra blocks + SpecialCases->getTerminator()->eraseFromParent(); + + // First off, check for special cases: dividend or divisor is zero, divisor + // is greater than dividend, and divisor is 1. + // ; special-cases: + // ; %ret0_1 = icmp eq i32 %divisor, 0 + // ; %ret0_2 = icmp eq i32 %dividend, 0 + // ; %ret0_3 = or i1 %ret0_1, %ret0_2 + // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) + // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) + // ; %sr = sub nsw i32 %tmp0, %tmp1 + // ; %ret0_4 = icmp ugt i32 %sr, 31 + // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %retDividend = icmp eq i32 %sr, 31 + // ; %retVal = select i1 %ret0, i32 0, i32 %dividend + // ; %earlyRet = or i1 %ret0, %retDividend + // ; br i1 %earlyRet, label %end, label %bb1 + Builder.SetInsertPoint(SpecialCases); + Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); + Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); + Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); + Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True); + Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True); + Value *SR = Builder.CreateSub(Tmp0, Tmp1); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne); + Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne); + Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); + Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Builder.CreateCondBr(EarlyRet, End, BB1); + + // ; bb1: ; preds = %special-cases + // ; %sr_1 = add i32 %sr, 1 + // ; %tmp2 = sub i32 31, %sr + // ; %q = shl i32 %dividend, %tmp2 + // ; %skipLoop = icmp eq i32 %sr_1, 0 + // ; br i1 %skipLoop, label %loop-exit, label %preheader + Builder.SetInsertPoint(BB1); + Value *SR_1 = Builder.CreateAdd(SR, One); + Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR); + Value *Q = Builder.CreateShl(Dividend, Tmp2); + Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); + Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); + + // ; preheader: ; preds = %bb1 + // ; %tmp3 = lshr i32 %dividend, %sr_1 + // ; %tmp4 = add i32 %divisor, -1 + // ; br label %do-while + Builder.SetInsertPoint(Preheader); + Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); + Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); + Builder.CreateBr(DoWhile); + + // ; do-while: ; preds = %do-while, %preheader + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + // ; %tmp5 = shl i32 %r_1, 1 + // ; %tmp6 = lshr i32 %q_2, 31 + // ; %tmp7 = or i32 %tmp5, %tmp6 + // ; %tmp8 = shl i32 %q_2, 1 + // ; %q_1 = or i32 %carry_1, %tmp8 + // ; %tmp9 = sub i32 %tmp4, %tmp7 + // ; %tmp10 = ashr i32 %tmp9, 31 + // ; %carry = and i32 %tmp10, 1 + // ; %tmp11 = and i32 %tmp10, %divisor + // ; %r = sub i32 %tmp7, %tmp11 + // ; %sr_2 = add i32 %sr_3, -1 + // ; %tmp12 = icmp eq i32 %sr_2, 0 + // ; br i1 %tmp12, label %loop-exit, label %do-while + Builder.SetInsertPoint(DoWhile); + PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2); + PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2); + PHINode *R_1 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2); + Value *Tmp5 = Builder.CreateShl(R_1, One); + Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne); + Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); + Value *Tmp8 = Builder.CreateShl(Q_2, One); + Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); + Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); + Value *Tmp10 = Builder.CreateAShr(Tmp9, 31); + Value *Carry = Builder.CreateAnd(Tmp10, One); + Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); + Value *R = Builder.CreateSub(Tmp7, Tmp11); + Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); + Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); + Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); + + // ; loop-exit: ; preds = %do-while, %bb1 + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + // ; %tmp13 = shl i32 %q_3, 1 + // ; %q_4 = or i32 %carry_2, %tmp13 + // ; br label %end + Builder.SetInsertPoint(LoopExit); + PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2); + Value *Tmp13 = Builder.CreateShl(Q_3, One); + Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); + Builder.CreateBr(End); + + // ; end: ; preds = %loop-exit, %special-cases + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + // ; ret i32 %q_5 + Builder.SetInsertPoint(End, End->begin()); + PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2); + + // Populate the Phis, since all values have now been created. Our Phis were: + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + Carry_1->addIncoming(Zero, Preheader); + Carry_1->addIncoming(Carry, DoWhile); + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + SR_3->addIncoming(SR_1, Preheader); + SR_3->addIncoming(SR_2, DoWhile); + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + R_1->addIncoming(Tmp3, Preheader); + R_1->addIncoming(R, DoWhile); + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + Q_2->addIncoming(Q, Preheader); + Q_2->addIncoming(Q_1, DoWhile); + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + Carry_2->addIncoming(Zero, BB1); + Carry_2->addIncoming(Carry, DoWhile); + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + Q_3->addIncoming(Q, BB1); + Q_3->addIncoming(Q_1, DoWhile); + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + Q_5->addIncoming(Q_4, LoopExit); + Q_5->addIncoming(RetVal, SpecialCases); + + return Q_5; +} + +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + +/// Generate code to divide two integers, replacing Div with the generated +/// code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. Currently only +/// implements 32bit scalar division, but future work is removing this +/// limitation. +/// +/// @brief Replace Div with generated code. +bool llvm::expandDivision(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + IRBuilder<> Builder(Div); + + if (Div->getType()->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + // First prepare the sign if it's a signed division + if (Div->getOpcode() == Instruction::SDiv) { + // Lower the code to unsigned division, and reset Div to point to the udiv. + Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::UDiv) + return true; + + Div = BO; + } + + // Insert the unsigned division code + Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), + Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return true; +} diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b654111eba74e..5e05c83c35666 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -53,6 +53,8 @@ namespace { // Cached analysis information for the current function. DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; Loop *L; @@ -117,6 +119,8 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { L = TheLoop; DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); // Get the set of exiting blocks. SmallVector<BasicBlock*, 8> ExitBlocks; @@ -156,6 +160,12 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { MadeChange |= ProcessInstruction(I, ExitBlocks); } } + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && MadeChange) + SE->forgetLoop(L); assert(L->isLCSSAForm(*DT)); PredCache.clear(); @@ -245,7 +255,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); } - + // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { @@ -260,6 +270,9 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + if (UsesToRewrite[i]->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); UsesToRewrite[i]->set(UserBB->begin()); continue; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bed7d72fffc68..a954d82c05bfe 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -23,6 +23,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" +#include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" @@ -38,7 +39,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -52,7 +53,8 @@ using namespace llvm; /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. -bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -96,7 +98,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = BI->getCondition(); BI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } return false; @@ -121,6 +123,27 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { // Check to see if this branch is going to the same place as the default // dest. If so, eliminate it as an explicit compare. if (i.getCaseSuccessor() == DefaultDest) { + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + // MD should have 2 + NumCases operands. + if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) { + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + // Merge weight of this case to the default weight. + unsigned idx = i.getCaseIndex(); + Weights[0] += Weights[idx+1]; + // Remove weight for this case. + std::swap(Weights[idx+1], Weights.back()); + Weights.pop_back(); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(Weights)); + } // Remove this entry. DefaultDest->removePredecessor(SI->getParent()); SI->removeCase(i); @@ -161,7 +184,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = SI->getCondition(); SI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } @@ -177,8 +200,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { "cond"); // Insert the new branch. - Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(), - SI->getDefaultDest()); + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + if (MD && MD->getNumOperands() == 3) { + ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); + assert(SICase && SIDef); + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(SICase->getValue().getZExtValue(), + SIDef->getValue().getZExtValue())); + } // Delete the old switch. SI->eraseFromParent(); @@ -205,7 +240,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Address); + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -230,7 +265,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { /// isInstructionTriviallyDead - Return true if the result produced by the /// instruction is not used, and the instruction has no side effects. /// -bool llvm::isInstructionTriviallyDead(Instruction *I) { +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; // We don't want the landingpad instruction removed by anything this general. @@ -265,9 +301,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa<UndefValue>(II->getArgOperand(1)); } - if (isAllocLikeFn(I)) return true; + if (isAllocLikeFn(I, TLI)) return true; - if (CallInst *CI = isFreeCall(I)) + if (CallInst *CI = isFreeCall(I, TLI)) if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) return C->isNullValue() || isa<UndefValue>(C); @@ -278,9 +314,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// trivially dead instruction, delete it. If that makes any of its operands /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. -bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<Instruction*, 16> DeadInsts; @@ -301,7 +339,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. if (Instruction *OpI = dyn_cast<Instruction>(OpV)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } @@ -334,19 +372,20 @@ static bool areAllUsesEqual(Instruction *I) { /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if a change was made. -bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { SmallPtrSet<Instruction*, 4> Visited; for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); I = cast<Instruction>(*I->use_begin())) { if (I->use_empty()) - return RecursivelyDeleteTriviallyDeadInstructions(I); + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. if (!Visited.insert(I)) { // Break the cycle and delete the instruction and its operands. I->replaceAllUsesWith(UndefValue::get(I->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(I); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); return true; } } @@ -358,7 +397,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, + const TargetLibraryInfo *TLI) { bool MadeChange = false; #ifndef NDEBUG @@ -381,7 +421,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { continue; } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (BIHandle != BI) BI = BB->begin(); } @@ -405,7 +445,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - TargetData *TD) { + DataLayout *TD) { // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; @@ -720,7 +760,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign, const TargetData *TD) { + unsigned PrefAlign, const DataLayout *TD) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { @@ -763,7 +803,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const TargetData *TD) { + const DataLayout *TD) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 0bc185d8b7228..9d9e201665640 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -46,6 +46,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" @@ -89,6 +90,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DependenceAnalysis>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } @@ -194,6 +196,11 @@ ReprocessLoop: BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); + + // This may make the loop analyzable, force SCEV recomputation. + if (SE) + SE->forgetLoop(L); + Changed = true; } } diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp new file mode 100644 index 0000000000000..233bc12d3cfdd --- /dev/null +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -0,0 +1,132 @@ +//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/TypeFinder.h" + +using namespace llvm; + +namespace { + + // This PRNG is from the ISO C spec. It is intentionally simple and + // unsuitable for cryptographic use. We're just looking for enough + // variety to surprise and delight users. + struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { + next = seed; + } + + int rand(void) { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } + }; + + struct MetaRenamer : public ModulePass { + static char ID; // Pass identification, replacement for typeid + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) { + static const char *metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" + }; + + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (std::string::const_iterator I = M.getModuleIdentifier().begin(), + E = M.getModuleIdentifier().end(); I != E; ++I) + randSeed += *I; + + PRNG prng; + prng.srand(randSeed); + + // Rename all aliases + for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); + AI != AE; ++AI) + AI->setName("alias"); + + // Rename all global variables + for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); + GI != GE; ++GI) + GI->setName("global"); + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isLiteral() || STy->getName().empty()) continue; + + SmallString<128> NameStorage; + STy->setName((Twine("struct.") + metaNames[prng.rand() % + array_lengthof(metaNames)]).toStringRef(NameStorage)); + } + + // Rename all functions + for (Module::iterator FI = M.begin(), FE = M.end(); + FI != FE; ++FI) { + FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); + runOnFunction(*FI); + } + return true; + } + + bool runOnFunction(Function &F) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + BB->setName("bb"); + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->getType()->isVoidTy()) + I->setName("tmp"); + } + return true; + } + }; +} + +char MetaRenamer::ID = 0; +INITIALIZE_PASS(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) +//===----------------------------------------------------------------------===// +// +// MetaRenamer - Rename everything with metasyntactic names. +// +ModulePass *llvm::createMetaRenamerPass() { + return new MetaRenamer(); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index dd5e20ed50a7b..558de9d12e6c5 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -212,9 +212,13 @@ namespace { /// DenseMap<AllocaInst*, unsigned> AllocaLookup; - /// NewPhiNodes - The PhiNodes we're adding. + /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify + /// some Phi nodes as we iterate over it, so it should have deterministic + /// iterators. We could use a MapVector, but since we already maintain a + /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is + /// more efficient (also supports removal). /// - DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; + DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes; /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas /// it corresponds to. @@ -588,7 +592,11 @@ void PromoteMem2Reg::run() { while (EliminatedAPHI) { EliminatedAPHI = false; - for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = + // Iterating over NewPhiNodes is deterministic, so it is safe to try to + // simplify and RAUW them as we go. If it was not, we could add uses to + // the values we replace with in a non deterministic order, thus creating + // non deterministic def->use chains. + for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; @@ -612,7 +620,7 @@ void PromoteMem2Reg::run() { // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // - for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = + for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. @@ -992,7 +1000,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, unsigned &Version) { // Look up the basic-block in question. - PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; // If the BB already has a phi node added for the i'th alloca then we're done! if (PN) return false; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index e568a616b6f0a..72d4199a2a69c 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 518df7cddab45..c767da624e19a 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/IRBuilder.h" @@ -22,6 +23,7 @@ #include "llvm/LLVMContext.h" #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -38,7 +40,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <set> @@ -53,6 +55,13 @@ static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); +static cl::opt<bool> +SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { @@ -68,10 +77,13 @@ namespace { // Comparing pointers is ok as we only rely on the order for uniquing. return Value < RHS.Value; } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } }; class SimplifyCFGOpt { - const TargetData *const TD; + const DataLayout *const TD; + const TargetTransformInfo *const TTI; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -91,7 +103,8 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} + SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti) + : TD(td), TTI(tti) {} bool run(BasicBlock *BB); }; } @@ -101,14 +114,14 @@ public: /// static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { if (SI1 == SI2) return false; // Can't merge with self! - + // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) if (SI1Succs.count(*I)) for (BasicBlock::iterator BBI = (*I)->begin(); @@ -118,7 +131,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { PN->getIncomingValueForBlock(SI2BB)) return false; } - + return true; } @@ -135,7 +148,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, assert(SI1->isUnconditional() && SI2->isConditional()); // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: + // common successors: // 1> We have a constant incoming value for the conditional branch; // 2> We have "Cond" as the incoming value for the unconditional branch; // 3> SI2->getCondition() and Cond have same operands. @@ -170,7 +183,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do - + PHINode *PN; for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) @@ -222,7 +235,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; - + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -252,7 +265,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); if (BI == 0) return 0; - + assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; @@ -345,7 +358,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. if (AggressiveInsts == 0) return false; - + // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -374,7 +387,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) @@ -382,7 +395,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -408,10 +421,10 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { /// Values vector. static Value * GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, - const TargetData *TD, bool isEQ, unsigned &UsedICmps) { + const DataLayout *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { @@ -420,21 +433,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Vals.push_back(C); return I->getOperand(0); } - + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to // the set. ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); - + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); - + // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; - + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; @@ -442,11 +455,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, } return 0; } - + // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) return 0; - + unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, @@ -467,12 +480,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Extra = I->getOperand(1); return LHS; } - + Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; return 0; } - + // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. if (Extra == 0 || Extra == I->getOperand(0)) { @@ -484,7 +497,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - + return 0; } @@ -556,11 +569,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, /// in the list that match the specified block. static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - for (unsigned i = 0, e = Cases.size(); i != e; ++i) - if (Cases[i].Dest == BB) { - Cases.erase(Cases.begin()+i); - --i; --e; - } + Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); } /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as @@ -615,6 +624,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Find out information about when control will move from Pred to TI's block. std::vector<ValueEqualityComparisonCase> PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), @@ -634,7 +646,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; - + if (isa<BranchInst>(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. @@ -652,7 +664,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, EraseTerminatorInstAndDCECond(TI); return true; } - + SwitchInst *SI = cast<SwitchInst>(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet<Constant*, 16> DeadCases; @@ -662,18 +674,37 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); + if (HasWeight) + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; if (DeadCases.count(i.getCaseValue())) { + if (HasWeight) { + std::swap(Weights[i.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } i.getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } } + if (HasWeight && Weights.size() >= 2) + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(Weights)); DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - + // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = 0; @@ -729,8 +760,8 @@ namespace { } static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt**)P1; - const ConstantInt *RHS = *(const ConstantInt**)P2; + const ConstantInt *LHS = *(const ConstantInt*const*)P1; + const ConstantInt *RHS = *(const ConstantInt*const*)P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -738,6 +769,56 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { return -1; } +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Get Weights of a given TerminatorInst, the default weight is at the front +/// of the vector. If TI is a conditional eq, we need to swap the branch-weight +/// metadata. +static void GetBranchWeights(TerminatorInst *TI, + SmallVectorImpl<uint64_t> &Weights) { + MDNode* MD = TI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If TI is a conditional eq, the default case is the false case, + // and the corresponding branch-weight data is at index 2. We swap the + // default weight to be the first entry. + if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(Weights.front(), Weights.back()); + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -770,6 +851,31 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // build. SmallVector<BasicBlock*, 8> NewSuccessors; + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistant here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + + SmallVector<uint64_t, 8> SuccWeights; + if (SuccHasWeights) { + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistant here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); + if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. @@ -780,6 +886,14 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + PredCases.pop_back(); --i; --e; } @@ -790,21 +904,47 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredDefault = BBDefault; NewSuccessors.push_back(BBDefault); } + + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights || PredHasWeights) { + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i+1]); + ValidTotalSuccWeight += SuccWeights[i+1]; + } } + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } } else { // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt*, uint64_t> WeightsForHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == BB) { PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); --i; --e; @@ -815,6 +955,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (PTIHandled.count(BBCases[i].Value)) { // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); PTIHandled.erase(BBCases[i].Value);// This constant is taken care of @@ -822,9 +964,11 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = + for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[*I]); PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); NewSuccessors.push_back(BBDefault); } @@ -839,7 +983,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); + assert(TD && "Cannot switch on pointer without DataLayout"); CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), "magicptr"); } @@ -851,6 +995,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } + EraseTerminatorInstAndDCECond(PTI); // Okay, last check. If BB is still a successor of PSI, then we must @@ -984,11 +1139,11 @@ HoistTerminator: Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; - + // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (SI == 0) SI = cast<SelectInst> (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1008,6 +1163,175 @@ HoistTerminator: return true; } +/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd, +/// check whether BBEnd has only two predecessors and the other predecessor +/// ends with an unconditional branch. If it is true, sink any common code +/// in the two predecessors to BBEnd. +static bool SinkThenElseCodeToEnd(BranchInst *BI1) { + assert(BI1->isUnconditional()); + BasicBlock *BB1 = BI1->getParent(); + BasicBlock *BBEnd = BI1->getSuccessor(0); + + // Check that BBEnd has two predecessors and the other predecessor ends with + // an unconditional branch. + pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd); + BasicBlock *Pred0 = *PI++; + if (PI == PE) // Only one predecessor. + return false; + BasicBlock *Pred1 = *PI++; + if (PI != PE) // More than two predecessors. + return false; + BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0; + BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator()); + if (!BI2 || !BI2->isUnconditional()) + return false; + + // Gather the PHI nodes in BBEnd. + std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; + Instruction *FirstNonPhiInBBEnd = 0; + for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I)) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); + } else { + FirstNonPhiInBBEnd = &*I; + break; + } + } + if (!FirstNonPhiInBBEnd) + return false; + + + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. We scan backward for obviously identical + // instructions in an identical order. + BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), + RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(), + RE2 = BB2->getInstList().rend(); + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return false; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return false; + // Skip the unconditional branches. + ++RI1; + ++RI2; + + bool Changed = false; + while (RI1 != RE1 && RI2 != RE2) { + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return Changed; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return Changed; + + Instruction *I1 = &*RI1, *I2 = &*RI2; + // I1 and I2 should have a single use in the same PHI node, and they + // perform the same operation. + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa<PHINode>(I1) || isa<PHINode>(I2) || + isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) || + isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) || + isa<AllocaInst>(I1) || isa<AllocaInst>(I2) || + I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || + I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || + !I1->hasOneUse() || !I2->hasOneUse() || + MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() || + MapValueFromBB1ToBB2[I1].first != I2) + return Changed; + + // Check whether we should swap the operands of ICmpInst. + ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2); + bool SwapOpnds = false; + if (ICmp1 && ICmp2 && + ICmp1->getOperand(0) != ICmp2->getOperand(0) && + ICmp1->getOperand(1) != ICmp2->getOperand(1) && + (ICmp1->getOperand(0) == ICmp2->getOperand(1) || + ICmp1->getOperand(1) == ICmp2->getOperand(0))) { + ICmp2->swapOperands(); + SwapOpnds = true; + } + if (!I1->isSameOperationAs(I2)) { + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + + // The operands should be either the same or they need to be generated + // with a PHI node after sinking. We only handle the case where there is + // a single pair of different operands. + Value *DifferentOp1 = 0, *DifferentOp2 = 0; + unsigned Op1Idx = 0; + for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { + if (I1->getOperand(I) == I2->getOperand(I)) + continue; + // Early exit if we have more-than one pair of different operands or + // the different operand is already in MapValueFromBB1ToBB2. + // Early exit if we need a PHI node to replace a constant. + if (DifferentOp1 || + MapValueFromBB1ToBB2.find(I1->getOperand(I)) != + MapValueFromBB1ToBB2.end() || + isa<Constant>(I1->getOperand(I)) || + isa<Constant>(I2->getOperand(I))) { + // If we can't sink the instructions, undo the swapping. + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + DifferentOp1 = I1->getOperand(I); + Op1Idx = I; + DifferentOp2 = I2->getOperand(I); + } + + // We insert the pair of different operands to MapValueFromBB1ToBB2 and + // remove (I1, I2) from MapValueFromBB1ToBB2. + if (DifferentOp1) { + PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2, + DifferentOp1->getName() + ".sink", + BBEnd->begin()); + MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN); + // I1 should use NewPN instead of DifferentOp1. + I1->setOperand(Op1Idx, NewPN); + NewPN->addIncoming(DifferentOp1, BB1); + NewPN->addIncoming(DifferentOp2, BB2); + DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); + } + PHINode *OldPN = MapValueFromBB1ToBB2[I1].second; + MapValueFromBB1ToBB2.erase(I1); + + DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";); + DEBUG(dbgs() << " " << *I2 << "\n";); + // We need to update RE1 and RE2 if we are going to sink the first + // instruction in the basic block down. + bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); + // Sink the instruction. + BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + if (!OldPN->use_empty()) + OldPN->replaceAllUsesWith(I1); + OldPN->eraseFromParent(); + + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); + I2->eraseFromParent(); + + if (UpdateRE1) + RE1 = BB1->getInstList().rend(); + if (UpdateRE2) + RE2 = BB2->getInstList().rend(); + FirstNonPhiInBBEnd = I1; + NumSinkCommons++; + Changed = true; + } + return Changed; +} + /// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 /// and an BB2 and the only successor of BB1 is BB2, hoist simple code /// (for now, restricted to a single instruction that's side effect free) from @@ -1056,7 +1380,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast<Instruction>(*i); if (OpI && OpI->getParent() == BIParent && @@ -1112,7 +1436,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // as well. if (PHIs.empty()) return false; - + // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); @@ -1162,13 +1486,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); unsigned Size = 0; - + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa<DbgInfoIntrinsic>(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; - + // We can only support instructions that do not define values that are // live outside of the current basic block. for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); @@ -1176,7 +1500,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB || isa<PHINode>(U)) return false; } - + // Looks ok, continue checking. } @@ -1187,38 +1511,38 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; - + // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); - return true; + return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; - + // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - + if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; - + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -1227,7 +1551,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); - + // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -1244,7 +1568,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName()+".c"); - + // Update operands due to translation. for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { @@ -1252,7 +1576,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { if (PI != TranslateMap.end()) *i = PI->second; } - + // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, TD)) { TranslateMap[BBI] = V; @@ -1283,7 +1607,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1297,7 +1621,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Don't bother if the branch will be constant folded trivially. isa<ConstantInt>(IfCond)) return false; - + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -1307,14 +1631,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) if (NumPhis > 2) return false; - + // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction*, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; - + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, TD)) { @@ -1322,19 +1646,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { PN->eraseFromParent(); continue; } - + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, MaxCostVal0) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, MaxCostVal1)) return false; } - + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); if (PN == 0) return true; - + // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && @@ -1342,7 +1666,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { isa<BinaryOperator>(PN->getIncomingValue(1)) || isa<BinaryOperator>(IfCond))) return false; - + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not @@ -1362,7 +1686,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { IfBlock2 = 0; } else { @@ -1375,15 +1699,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); IRBuilder<true, NoFolder> Builder(InsertPt); - + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) @@ -1394,19 +1718,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { DomBlock->getInstList().splice(InsertPt, IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - SelectInst *NV = + + SelectInst *NV = cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. @@ -1420,14 +1744,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); - + // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. @@ -1447,12 +1771,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, EraseTerminatorInstAndDCECond(BI); return true; } - + // Otherwise, figure out what the true and false return values are // so we can insert a new select instruction. Value *TrueValue = TrueRet->getReturnValue(); Value *FalseValue = FalseRet->getReturnValue(); - + // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) @@ -1460,7 +1784,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - + // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is // normally the case, but we could have a potentially-trapping @@ -1472,12 +1796,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) if (FCV->canTrap()) return false; - + // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - + // Insert select instructions where needed. Value *BrCond = BI->getCondition(); if (TrueValue) { @@ -1491,15 +1815,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } } - Value *RI = !TrueValue ? + Value *RI = !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); (void) RI; - + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); - + EraseTerminatorInstAndDCECond(BI); return true; @@ -1510,7 +1834,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, /// parameters and return true, or returns false if no or invalid metadata was /// found. static bool ExtractBranchMetadata(BranchInst *BI, - APInt &ProbTrue, APInt &ProbFalse) { + uint64_t &ProbTrue, uint64_t &ProbFalse) { assert(BI->isConditional() && "Looking for probabilities on unconditional branch?"); MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); @@ -1518,35 +1842,11 @@ static bool ExtractBranchMetadata(BranchInst *BI, ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1)); ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2)); if (!CITrue || !CIFalse) return false; - ProbTrue = CITrue->getValue(); - ProbFalse = CIFalse->getValue(); - assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 && - "Branch probability metadata must be 32-bit integers"); + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); return true; } -/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In -/// the event of overflow, logically-shifts all four inputs right until the -/// multiply fits. -static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, - unsigned &BitsLost) { - BitsLost = 0; - bool Overflow = false; - APInt Result = A.umul_ov(B, Overflow); - if (Overflow) { - APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A); - do { - B = B.lshr(1); - ++BitsLost; - } while (B.ugt(MaxB)); - A = A.lshr(BitsLost); - C = C.lshr(BitsLost); - D = D.lshr(BitsLost); - Result = A * B; - } - return Result; -} - /// checkCSEInPredecessor - Return true if the given instruction is available /// in its predecessor block. If yes, the instruction will be removed. /// @@ -1600,7 +1900,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (Cond == 0) return false; } - + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1623,7 +1923,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; - + // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } @@ -1631,13 +1931,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; - + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; - + if (&*CondIt != BI) return false; @@ -1649,7 +1949,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) if (CE->canTrap()) return false; - + // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; @@ -1659,22 +1959,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); - + // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. SmallVector<PHINode*, 4> PHIs; if (PBI == 0 || PBI->isUnconditional() || - (BI->isConditional() && + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; - + // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc; + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; bool InvertPredCond = false; - + if (BI->isConditional()) { if (PBI->getSuccessor(0) == TrueDest) Opc = Instruction::Or; @@ -1693,7 +1993,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values - // must already have been resolved, so we won't be inhibiting the + // must already have been resolved, so we won't be inhibiting the // out-of-order core by speculating them earlier. if (BonusInst) { // Collect the values used by the bonus inst @@ -1707,47 +2007,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { SmallVector<std::pair<Value*, unsigned>, 4> Worklist; Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); - + // Walk up to four levels back up the use-def chain of the predecessor's // terminator to see if all those values were used. The choice of four // levels is arbitrary, to provide a compile-time-cost bound. while (!Worklist.empty()) { std::pair<Value*, unsigned> Pair = Worklist.back(); Worklist.pop_back(); - + if (Pair.second >= 4) continue; UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); - } + } } - + if (!UsedValues.empty()) return false; } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); + IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); - + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { CmpInst *CI = cast<CmpInst>(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, + NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName()+".not"); } - + PBI->setCondition(NewCond); PBI->swapSuccessors(); } - + // If we have a bonus inst, clone it into the predecessor block. Instruction *NewBonus = 0; if (BonusInst) { @@ -1756,7 +2056,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); } - + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); @@ -1764,21 +2064,60 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); - + if (BI->isConditional()) { - Instruction *NewCond = + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); PBI->setCondition(NewCond); + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + SmallVector<uint64_t, 8> NewWeights; + if (PBI->getSuccessor(0) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); + } AddPredecessorToBlock(TrueDest, PredBlock, BB); PBI->setSuccessor(0, TrueDest); } if (PBI->getSuccessor(1) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + + SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } else + PBI->setMetadata(LLVMContext::MD_prof, NULL); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { @@ -1806,7 +2145,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) // is false: PBI_Cond and BI_Value - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::And, PBI->getCondition(), New, "and.cond")); @@ -1814,7 +2153,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *NotCond = cast<Instruction>(Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::Or, NotCond, MergedCond, "or.cond")); @@ -1833,95 +2172,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // TODO: If BB is reachable from all paths through PredBlock, then we // could replace PBI's branch probabilities with BI's. - // Merge probability data into PredBlock's branch. - APInt A, B, C, D; - if (PBI->isConditional() && BI->isConditional() && - ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) { - // Given IR which does: - // bbA: - // br i1 %x, label %bbB, label %bbC - // bbB: - // br i1 %y, label %bbD, label %bbC - // Let's call the probability that we take the edge from %bbA to %bbB - // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to - // %bbC probability 'd'. - // - // We transform the IR into: - // bbA: - // br i1 %z, label %bbD, label %bbC - // where the probability of going to %bbD is (a*c) and going to bbC is - // (b+a*d). - // - // Probabilities aren't stored as ratios directly. Using branch weights, - // we get: - // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D. - - // In the event of overflow, we want to drop the LSB of the input - // probabilities. - unsigned BitsLost; - - // Ignore overflow result on ProbTrue. - APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost); - - APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - } - - APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - Tmp1 = Tmp1.lshr(BitsLost*2); - } - - APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - Tmp1 = Tmp1.lshr(BitsLost*2); - Tmp2 = Tmp2.lshr(BitsLost*2); - } - - bool Overflow1 = false, Overflow2 = false; - APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1); - APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2); - - if (Overflow1 || Overflow2) { - ProbTrue = ProbTrue.lshr(1); - Tmp1 = Tmp1.lshr(1); - Tmp2 = Tmp2.lshr(1); - Tmp3 = Tmp3.lshr(1); - Tmp4 = Tmp2 + Tmp3; - ProbFalse = Tmp4 + Tmp1; - } - - // The sum of branch weights must fit in 32-bits. - if (ProbTrue.isNegative() && ProbFalse.isNegative()) { - ProbTrue = ProbTrue.lshr(1); - ProbFalse = ProbFalse.lshr(1); - } - - if (ProbTrue != ProbFalse) { - // Normalize the result. - APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse); - ProbTrue = ProbTrue.udiv(GCD); - ProbFalse = ProbFalse.udiv(GCD); - - MDBuilder MDB(BI->getContext()); - MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(), - ProbFalse.getZExtValue()); - PBI->setMetadata(LLVMContext::MD_prof, N); - } else { - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } - } else { - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } - // Copy any debug value intrinsics into the end of PredBlock. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) I->clone()->insertBefore(PBI); - + return true; } return false; @@ -1936,7 +2191,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make + // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { @@ -1945,11 +2200,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - + // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. @@ -1969,18 +2224,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } - + BI->setCondition(NewPN); return true; } } - + // If this is a conditional branch in an empty block, and if any // predecessors is a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -1991,11 +2246,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition())) if (CE->canTrap()) return false; - + int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2007,31 +2262,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBIOp = BIOp = 1; else return false; - + // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; - - // Do not perform this transformation if it would require + + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - + unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) if (NumPhis > 2) // Disable this xform. return false; - + // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - - + + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -2046,13 +2301,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; - } - + } + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. - + // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder<true, NoFolder> Builder(PBI); @@ -2065,16 +2320,43 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - + // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - + + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + if (PredHasWeights && SuccHasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + SmallVector<uint64_t, 2> NewWeights; + NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon); + NewWeights.push_back(PredOther * SuccOther); + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - + // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make @@ -2092,10 +2374,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PN->setIncomingValue(PBBIdx, NV); } } - + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; @@ -2107,7 +2389,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB){ + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight){ // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -2136,10 +2420,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - else + else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. - Builder.CreateCondBr(Cond, TrueBB, FalseBB); + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(OldTerm->getContext()). + createBranchWeights(TrueWeight, FalseWeight)); + } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this // terminator must be unreachable. @@ -2176,8 +2465,23 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal). + getSuccessorIndex()]; + FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal). + getSuccessorIndex()]; + } + } + // Perform the actual simplification. - return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, + TrueWeight, FalseWeight); } // SimplifyIndirectBrOnSelect - Replaces @@ -2197,7 +2501,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { BasicBlock *FalseBB = FBA->getBasicBlock(); // Perform the actual simplification. - return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, + 0, 0); } /// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp @@ -2214,11 +2519,11 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// +/// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, - const TargetData *TD, + const DataLayout *TD, IRBuilder<> &Builder) { BasicBlock *BB = ICI->getParent(); @@ -2228,17 +2533,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, Value *V = ICI->getOperand(0); ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); - + // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; - + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) return false; - + // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. @@ -2246,7 +2551,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - + if (Value *V = SimplifyInstruction(ICI, TD)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); @@ -2254,7 +2559,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. @@ -2264,13 +2569,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); - + ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); @@ -2296,8 +2601,23 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // the switch to the merge point on the compared value. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Split weight for default case to case for "Cst". + Weights[0] = (Weights[0]+1) >> 1; + Weights.push_back(Weights[0]); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(MDWeights)); + } + } SI->addCase(Cst, NewBB); - + // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); @@ -2309,12 +2629,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0) return false; - - + + // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. @@ -2323,7 +2643,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, bool TrueWhenEqual = true; Value *ExtraCase = 0; unsigned UsedICmps = 0; - + if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, UsedICmps); @@ -2332,7 +2652,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, UsedICmps); TrueWhenEqual = false; } - + // If we didn't have a multiply compared value, fail. if (CompVal == 0) return false; @@ -2344,21 +2664,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - + // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; - + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - + BasicBlock *BB = BI->getParent(); - + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); - + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. @@ -2372,13 +2695,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - + OldTI->eraseFromParent(); - + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; @@ -2387,19 +2710,19 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); + assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, TD->getIntPtrType(CompVal->getContext()), "magicptr"); } - + // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); - + // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. @@ -2410,10 +2733,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, for (unsigned i = 0, e = Values.size()-1; i != e; ++i) PN->addIncoming(InVal, BB); } - + // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -2467,7 +2790,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - + // Find predecessors that end with branches. SmallVector<BasicBlock*, 8> UncondBranchPreds; SmallVector<BranchInst*, 8> CondBranchPreds; @@ -2481,7 +2804,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { CondBranchPreds.push_back(BI); } } - + // If we found some, do the transformation! if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { @@ -2490,21 +2813,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } - + // If we eliminated all predecessors of the block, delete the block now. if (pred_begin(BB) == pred_end(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); - + return true; } - + // Check out all of the conditional branches going to this return // instruction. If any of them just select between returns, change the // branch itself into a select/return pair. while (!CondBranchPreds.empty()) { BranchInst *BI = CondBranchPreds.pop_back_val(); - + // Check to see if the non-BB successor is also a return block. if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && @@ -2516,9 +2839,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); - + bool Changed = false; - + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI != BB->begin()) { @@ -2558,11 +2881,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BBI->eraseFromParent(); Changed = true; } - + // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; - + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); @@ -2615,7 +2938,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *MaxBlock = 0; for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || + if (I->second.first > MaxPop || (I->second.first == MaxPop && MaxIndex > I->second.second)) { MaxPop = I->second.first; MaxIndex = I->second.second; @@ -2627,13 +2950,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // edges to it. SI->setDefaultDest(MaxBlock); Changed = true; - + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from // it. if (isa<PHINode>(MaxBlock->begin())) for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == MaxBlock) { @@ -2648,7 +2971,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // place to note that the call does not throw though. BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table - + // Insert the call now... SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); @@ -2663,7 +2986,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } - + // If this block is now dead, remove it. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { @@ -2706,9 +3029,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - Builder.CreateCondBr( + BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + // Update weight for the newly-created conditional branch. + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Combine all weights for the cases to be the true weight of NewBI. + // We assume that the sum of all weights for a Terminator can fit into 32 + // bits. + uint32_t NewTrueWeight = 0; + for (unsigned I = 1, E = Weights.size(); I != E; ++I) + NewTrueWeight += (uint32_t)Weights[I]; + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(NewTrueWeight, + (uint32_t)Weights[0])); + } + } + // Prune obsolete incoming values off the successor's PHI nodes. for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); isa<PHINode>(BBI); ++BBI) { @@ -2739,15 +3081,33 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { } } + SmallVector<uint64_t, 8> Weights; + bool HasWeight = HasBranchWeights(SI); + if (HasWeight) { + GetBranchWeights(SI, Weights); + HasWeight = (Weights.size() == 1 + SI->getNumCases()); + } + // Remove dead cases from the switch. for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); assert(Case != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); + if (HasWeight) { + std::swap(Weights[Case.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + // Prune unused values from PHI nodes. Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } + if (HasWeight) { + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(MDWeights)); + } return !DeadCases.empty(); } @@ -2823,33 +3183,512 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } -bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { - // If this switch is too complex to want to look at, ignore it. - if (!isValueEqualityComparison(SI)) +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa<ConstantFP>(C) || + isa<ConstantInt>(C) || + isa<ConstantPointerNull>(C) || + isa<GlobalValue>(C) || + isa<UndefValue>(C); +} + +/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant *LookupConstant(Value *V, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (Constant *C = dyn_cast<Constant>(V)) + return C; + return ConstantPool.lookup(V); +} + +/// ConstantFold - Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant *ConstantFold(Instruction *I, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::get(BO->getOpcode(), A, B); + } + + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(I->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); + } + + if (SelectInst *Select = dyn_cast<SelectInst>(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return 0; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return 0; + } + + if (CastInst *Cast = dyn_cast<CastInst>(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); + } + + return 0; +} + +/// GetCaseResults - Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. +static bool GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap<Value*, Constant*> ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; + ++I) { + if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { + // If the terminator is a simple branch, continue to the next block. + if (T->getNumSuccessors() != 1) + return false; + Pred = CaseDest; + CaseDest = T->getSuccessor(0); + } else if (isa<DbgInfoIntrinsic>(I)) { + // Skip debug intrinsic. + continue; + } else if (Constant *C = ConstantFold(I, ConstantPool)) { + // Instruction is side-effect free and constant. + ConstantPool.insert(std::make_pair(I, C)); + } else { + break; + } + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx), + ConstantPool); + if (!ConstVal) + return false; + + // Note: If the constant comes from constant-propagating the case value + // through the CaseDest basic block, it will be safe to remove the + // instructions in that block. They cannot be used (except in the phi nodes + // we visit) outside CaseDest, because that block does not dominate its + // successor. If it did, we would not be in this phi node. + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const DataLayout *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const DataLayout *TD) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); + + // If all values in the table are equal, this is that value. + SingleValue = Values.begin()->second; + + // Build up the table contents. + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SingleValue) + SingleValue = 0; + } + + // Fill in any holes in the table with the default result. + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; + } + + if (DefaultValue != SingleValue) + SingleValue = 0; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; + } + + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa<UndefValue>(TableContents[I - 1])) { + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const DataLayout *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() * 10 >= TableSize * 4) + return true; + + // If each table would fit in a register, we should build it anyway. + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) + return false; + } + return true; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder, + const DataLayout* TD, + const TargetTransformInfo *TTI) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + + // Only build lookup table when we have a target that supports it. + if (!TTI || !TTI->getScalarTargetTransformInfo() || + !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables()) return false; + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases is too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = 0; + typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; + SmallDenseMap<PHINode*, ResultListTy> ResultLists; + SmallDenseMap<PHINode*, Constant*> DefaultResults; + SmallDenseMap<PHINode*, Type*> ResultTypes; + SmallVector<PHINode*, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; + if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) + return false; + + // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); + + Value *Result = Table.BuildLookup(TableIndex, Builder); + + // If the result is used to return immediately from the function, we want to + // do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) && + *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + break; + } + + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + +bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB) | true; + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB) | true; - Value *Cond = SI->getCondition(); - if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) - if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB) | true; + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB) | true; - // If the block only contains the switch, see if we can fold the block - // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB) | true; + } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) @@ -2862,13 +3701,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder, TD, TTI)) + return SimplifyCFG(BB) | true; + return false; } bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; - + // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { @@ -2879,7 +3721,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { --i; --e; Changed = true; } - } + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. @@ -2887,14 +3729,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { EraseTerminatorInstAndDCECond(IBI); return true; } - + if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorInstAndDCECond(IBI); return true; } - + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) return SimplifyCFG(BB) | true; @@ -2904,13 +3746,16 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); - + + if (SinkCommon && SinkThenElseCodeToEnd(BI)) + return true; + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - + // If the only instruction in the block is a seteq/setne comparison // against a constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) @@ -2921,7 +3766,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value @@ -2934,7 +3779,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); - + // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, @@ -2943,7 +3788,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; - + // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. BasicBlock::iterator I = BB->begin(); @@ -2962,17 +3807,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return SimplifyCFG(BB) | true; } } - + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI)) return SimplifyCFG(BB) | true; - + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2999,14 +3844,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB) | true; } - + // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) return SimplifyCFG(BB) | true; - + // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) @@ -3023,11 +3868,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { if (!C) return false; - if (!I->hasOneUse()) // Only look at single-use instructions, for compile time + if (I->use_empty()) return false; if (C->isNullValue()) { - Instruction *Use = I->use_back(); + // Only look at the first use, avoid hurting compile time with long uselists + User *Use = *I->use_begin(); // Now make sure that there are no instructions in between that can alter // control flow (eg. calls) @@ -3114,7 +3960,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // if (MergeBlockIntoPredecessor(BB)) return true; - + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can @@ -3152,6 +3998,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { - return SimplifyCFGOpt(TD).run(BB); +bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD, + const TargetTransformInfo *TTI) { + return SimplifyCFGOpt(TD, TTI).run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 5d673f1824112..110f3808573e9 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -44,7 +44,7 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; - const TargetData *TD; // May be NULL + const DataLayout *TD; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; @@ -56,7 +56,7 @@ namespace { L(Loop), LI(LPM->getAnalysisIfAvailable<LoopInfo>()), SE(SE), - TD(LPM->getAnalysisIfAvailable<TargetData>()), + TD(LPM->getAnalysisIfAvailable<DataLayout>()), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 81eb9e0f8ae16..65353dc46037f 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -23,7 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" @@ -46,7 +46,7 @@ namespace { /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) { const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -72,7 +72,7 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); } // Place the list of instructions to simplify on the next loop iteration diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp new file mode 100644 index 0000000000000..c3ea63852feda --- /dev/null +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -0,0 +1,1149 @@ +//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/DataLayout.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" + +using namespace llvm; + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. +namespace { +class LibCallOptimization { +protected: + Function *Caller; + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + LLVMContext* Context; +public: + LibCallOptimization() { } + virtual ~LibCallOptimization() {} + + /// callOptimizer - This pure virtual method is implemented by base classes to + /// do various optimizations. If this returns null then no transformation was + /// performed. If it returns CI, then it transformed the call and CI is to be + /// deleted. If it returns something else, replace CI with the new value and + /// delete CI. + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + =0; + + Value *optimizeCall(CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, IRBuilder<> &B) { + Caller = CI->getParent()->getParent(); + this->TD = TD; + this->TLI = TLI; + this->LCS = LCS; + if (CI->getCalledFunction()) + Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + + return callOptimizer(CI->getCalledFunction(), CI, B); + } +}; + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool isOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FortifiedLibCallOptimization : public LibCallOptimization { +protected: + virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, + bool isString) const = 0; +}; + +struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization { + CallInst *CI; + + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { + if (SizeCI->isAllOnesValue()) + return true; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) + return SizeCI->getZExtValue() >= Arg->getZExtValue(); + } + return false; + } +}; + +struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemSetChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), + false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // __strcpy_chk(x,x) -> x + return Src; + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain strcpy. Otherwise we'll keep our + // strcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __strcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Value *Ret = + EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(Context), Len), + CI->getArgOperand(2), B, TD, TLI); + return Ret; + } + return 0; + } +}; + +struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain stpcpy. Otherwise we'll keep our + // stpcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI)) + return 0; + return DstEnd; + } + return 0; + } +}; + +struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + return Ret; + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// +// String and Memory Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct StrCatOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + return emitStrLenMemCpy(Src, Dst, Len, B); + } + + Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, + IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; + } +}; + +struct StrNCatOpt : public StrCatOpt { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + uint64_t Len; + + // We don't do anything if length is not constant + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Len = LengthArg->getZExtValue(); + else + return 0; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // We don't optimize this case + if (Len < SrcLen) return 0; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further + return emitStrLenMemCpy(Src, Dst, SrcLen, B); + } +}; + +struct StrChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (CharC == 0) { + // These optimizations require DataLayout. + if (!TD) return 0; + + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. + return 0; + + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD, TLI); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) + return 0; + + // Compute the offset, make sure to handle the case when we're searching for + // zero (a weird way to spell strlen). + size_t I = CharC->getSExtValue() == 0 ? + Str.size() : Str.find(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + } +}; + +struct StrRChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strrchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return 0; + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (TD && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TD, TLI); + return 0; + } + + // Compute the offset. + size_t I = CharC->getSExtValue() == 0 ? + Str.size() : Str.rfind(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + } +}; + +struct StrCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), Str1.compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 && Len2) { + // These optimizations require DataLayout. + if (!TD) return 0; + + return EmitMemCmp(Str1P, Str2P, + ConstantInt::get(TD->getIntPtrType(*Context), + std::min(Len1, Len2)), B, TD, TLI); + } + + return 0; + } +}; + +struct StrNCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Length = LengthArg->getZExtValue(); + else + return 0; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = Str1.substr(0, Length); + StringRef SubStr2 = Str2.substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + return 0; + } +}; + +struct StrCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + return Dst; + } +}; + +struct StpCpyOpt: public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "stpcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, LenV, 1); + return DstEnd; + } +}; + +struct StrNCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) + Len = LengthArg->getZExtValue(); + else + return 0; + + if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + + // These optimizations require DataLayout. + if (!TD) return 0; + + // Let strncpy handle the zero padding + if (Len > SrcLen+1) return 0; + + Type *PT = FT->getParamType(0); + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(PT), Len), 1); + + return Dst; + } +}; + +struct StrLenOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + Value *Src = CI->getArgOperand(0); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len-1); + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + if (isOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return 0; + } +}; + +struct StrPBrkOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + FT->getReturnType() != FT->getParamType(0)) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> NULL + // strpbrk("", s) -> NULL + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == std::string::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (TD && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); + + return 0; + } +}; + +struct StrToOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy()) + return 0; + + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addAttribute(1, Attributes::get(Callee->getContext(), + Attributes::NoCapture)); + } + + return 0; + } +}; + +struct StrSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_not_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + return 0; + } +}; + +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + + return 0; + } +}; + +struct StrStrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + LCS->replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } + return 0; + } +}; + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return 0; + uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemSetOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +} // End anonymous namespace. + +namespace llvm { + +class LibCallSimplifierImpl { + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + StringMap<LibCallOptimization*> Optimizations; + + // Fortified library call optimizations. + MemCpyChkOpt MemCpyChk; + MemMoveChkOpt MemMoveChk; + MemSetChkOpt MemSetChk; + StrCpyChkOpt StrCpyChk; + StpCpyChkOpt StpCpyChk; + StrNCpyChkOpt StrNCpyChk; + + // String library call optimizations. + StrCatOpt StrCat; + StrNCatOpt StrNCat; + StrChrOpt StrChr; + StrRChrOpt StrRChr; + StrCmpOpt StrCmp; + StrNCmpOpt StrNCmp; + StrCpyOpt StrCpy; + StpCpyOpt StpCpy; + StrNCpyOpt StrNCpy; + StrLenOpt StrLen; + StrPBrkOpt StrPBrk; + StrToOpt StrTo; + StrSpnOpt StrSpn; + StrCSpnOpt StrCSpn; + StrStrOpt StrStr; + + // Memory library call optimizations. + MemCmpOpt MemCmp; + MemCpyOpt MemCpy; + MemMoveOpt MemMove; + MemSetOpt MemSet; + + void initOptimizations(); + void addOpt(LibFunc::Func F, LibCallOptimization* Opt); +public: + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS) { + this->TD = TD; + this->TLI = TLI; + this->LCS = LCS; + } + + Value *optimizeCall(CallInst *CI); +}; + +void LibCallSimplifierImpl::initOptimizations() { + // Fortified library call optimizations. + Optimizations["__memcpy_chk"] = &MemCpyChk; + Optimizations["__memmove_chk"] = &MemMoveChk; + Optimizations["__memset_chk"] = &MemSetChk; + Optimizations["__strcpy_chk"] = &StrCpyChk; + Optimizations["__stpcpy_chk"] = &StpCpyChk; + Optimizations["__strncpy_chk"] = &StrNCpyChk; + Optimizations["__stpncpy_chk"] = &StrNCpyChk; + + // String library call optimizations. + addOpt(LibFunc::strcat, &StrCat); + addOpt(LibFunc::strncat, &StrNCat); + addOpt(LibFunc::strchr, &StrChr); + addOpt(LibFunc::strrchr, &StrRChr); + addOpt(LibFunc::strcmp, &StrCmp); + addOpt(LibFunc::strncmp, &StrNCmp); + addOpt(LibFunc::strcpy, &StrCpy); + addOpt(LibFunc::stpcpy, &StpCpy); + addOpt(LibFunc::strncpy, &StrNCpy); + addOpt(LibFunc::strlen, &StrLen); + addOpt(LibFunc::strpbrk, &StrPBrk); + addOpt(LibFunc::strtol, &StrTo); + addOpt(LibFunc::strtod, &StrTo); + addOpt(LibFunc::strtof, &StrTo); + addOpt(LibFunc::strtoul, &StrTo); + addOpt(LibFunc::strtoll, &StrTo); + addOpt(LibFunc::strtold, &StrTo); + addOpt(LibFunc::strtoull, &StrTo); + addOpt(LibFunc::strspn, &StrSpn); + addOpt(LibFunc::strcspn, &StrCSpn); + addOpt(LibFunc::strstr, &StrStr); + + // Memory library call optimizations. + addOpt(LibFunc::memcmp, &MemCmp); + addOpt(LibFunc::memcpy, &MemCpy); + addOpt(LibFunc::memmove, &MemMove); + addOpt(LibFunc::memset, &MemSet); +} + +Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { + if (Optimizations.empty()) + initOptimizations(); + + Function *Callee = CI->getCalledFunction(); + LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + if (LCO) { + IRBuilder<> Builder(CI); + return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); + } + return 0; +} + +void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { + if (TLI->has(F)) + Optimizations[TLI->getName(F)] = Opt; +} + +LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI) { + Impl = new LibCallSimplifierImpl(TD, TLI, this); +} + +LibCallSimplifier::~LibCallSimplifier() { + delete Impl; +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + return Impl->optimizeCall(CI); +} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { + I->replaceAllUsesWith(With); + I->eraseFromParent(); +} + +} diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index 24e8c8ff5c5f1..5812d4607dfcf 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -29,6 +29,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializePromotePassPass(Registry); initializeUnifyFunctionExitNodesPass(Registry); initializeInstSimplifierPass(Registry); + initializeMetaRenamerPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fc2538db6482f..a30b09321b5e6 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -21,7 +21,7 @@ using namespace llvm; // Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::Anchor() {} +void ValueMapTypeRemapper::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper) { |