diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2012-12-02 13:10:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2012-12-02 13:10:19 +0000 |
commit | 522600a229b950314b5f4af84eba4f3e8a0ffea1 (patch) | |
tree | 32b4679ab4b8f28e5228daafc65e9dc436935353 /lib/Transforms/InstCombine | |
parent | 902a7b529820e6a0aa85f98f21afaeb1805a22f8 (diff) |
Notes
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 14 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 172 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 25 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 70 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 201 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 17 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombinePHI.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 30 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineWorklist.h | 4 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 401 |
14 files changed, 754 insertions, 195 deletions
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 0d5ef904ee472..7467eca7ab1fc 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -18,10 +18,11 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" namespace llvm { class CallSite; - class TargetData; + class DataLayout; class TargetLibraryInfo; class DbgDeclareInst; class MemIntrinsic; @@ -71,9 +72,10 @@ public: class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { - TargetData *TD; + DataLayout *TD; TargetLibraryInfo *TLI; bool MadeIRChange; + LibCallSimplifier *Simplifier; public: /// Worklist - All of the instructions that need to be simplified. InstCombineWorklist Worklist; @@ -95,7 +97,7 @@ public: virtual void getAnalysisUsage(AnalysisUsage &AU) const; - TargetData *getTargetData() const { return TD; } + DataLayout *getDataLayout() const { return TD; } TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } @@ -218,7 +220,7 @@ private: Type *Ty); Instruction *visitCallSite(CallSite CS); - Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD); + Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *TD); bool transformConstExprCastCall(CallSite CS); Instruction *transformCallThroughTrampoline(CallSite CS, IntrinsicInst *Tramp); @@ -365,6 +367,10 @@ private: Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); + + /// Descale - Return a value X such that Val = X * Scale, or null if none. If + /// the multiplication is known not to overflow then NoSignedWrap is set. + Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 99b62f8d05a75..d8257e64d8370 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -13,7 +13,7 @@ #include "InstCombine.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cbe1ca4ddcec0..48f270429e5a6 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -13,7 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/CallSite.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -29,6 +29,26 @@ static Type *getPromotedType(Type *Ty) { return Ty; } +/// reduceToSingleValueType - Given an aggregate type which ultimately holds a +/// single scalar element, like {{{type}}} or [1 x type], return type. +static Type *reduceToSingleValueType(Type *T) { + while (!T->isSingleValueType()) { + if (StructType *STy = dyn_cast<StructType>(T)) { + if (STy->getNumElements() == 1) + T = STy->getElementType(0); + else + break; + } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) { + if (ATy->getNumElements() == 1) + T = ATy->getElementType(); + else + break; + } else + break; + } + + return T; +} Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD); @@ -74,35 +94,37 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // dest address will be promotable. See if we can find a better type than the // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); + MDNode *CopyMD = 0; if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. - while (!SrcETy->isSingleValueType()) { - if (StructType *STy = dyn_cast<StructType>(SrcETy)) { - if (STy->getNumElements() == 1) - SrcETy = STy->getElementType(0); - else - break; - } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) { - if (ATy->getNumElements() == 1) - SrcETy = ATy->getElementType(); - else - break; - } else - break; - } + SrcETy = reduceToSingleValueType(SrcETy); if (SrcETy->isSingleValueType()) { NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); + + // If the memcpy has metadata describing the members, see if we can + // get the TBAA tag describing our copy. + if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { + if (M->getNumOperands() == 3 && + M->getOperand(0) && + isa<ConstantInt>(M->getOperand(0)) && + cast<ConstantInt>(M->getOperand(0))->isNullValue() && + M->getOperand(1) && + isa<ConstantInt>(M->getOperand(1)) && + cast<ConstantInt>(M->getOperand(1))->getValue() == Size && + M->getOperand(2) && + isa<MDNode>(M->getOperand(2))) + CopyMD = cast<MDNode>(M->getOperand(2)); + } } } } - // If the memcpy/memmove provides better alignment info than we can // infer, use it. SrcAlign = std::max(SrcAlign, CopyAlign); @@ -112,8 +134,12 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); L->setAlignment(SrcAlign); + if (CopyMD) + L->setMetadata(LLVMContext::MD_tbaa, CopyMD); StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); S->setAlignment(DstAlign); + if (CopyMD) + S->setMetadata(LLVMContext::MD_tbaa, CopyMD); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); @@ -168,7 +194,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) + if (isFreeCall(&CI, TLI)) return visitFree(CI); // If the caller function is nounwind, mark the call as nounwind, even if the @@ -243,7 +269,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: break; case Intrinsic::objectsize: { uint64_t Size; - if (getObjectSize(II->getArgOperand(0), Size, TD)) + if (getObjectSize(II->getArgOperand(0), Size, TD, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); return 0; } @@ -731,7 +757,7 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, const CastInst * const CI, - const TargetData * const TD, + const DataLayout * const TD, const int ix) { if (!CI->isLosslessCast()) return false; @@ -752,49 +778,17 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, return true; } -namespace { -class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls { - InstCombiner *IC; -protected: - void replaceCall(Value *With) { - NewInstruction = IC->ReplaceInstUsesWith(*CI, With); - } - bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) - return true; - if (ConstantInt *SizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { - if (SizeCI->isAllOnesValue()) - return true; - if (isString) { - uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); - // If the length is 0 we don't know how long it is and so we can't - // remove the check. - if (Len == 0) return false; - return SizeCI->getZExtValue() >= Len; - } - if (ConstantInt *Arg = dyn_cast<ConstantInt>( - CI->getArgOperand(SizeArgOp))) - return SizeCI->getZExtValue() >= Arg->getZExtValue(); - } - return false; - } -public: - InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { } - Instruction *NewInstruction; -}; -} // end anonymous namespace - // Try to fold some different type of calls here. // Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. -Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { if (CI->getCalledFunction() == 0) return 0; - InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD, TLI); - return Simplifier.NewInstruction; + if (Value *With = Simplifier->optimizeCall(CI)) + return ReplaceInstUsesWith(*CI, With); + + return 0; } static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { @@ -877,7 +871,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { - if (isAllocLikeFn(CS.getInstruction())) + if (isAllocLikeFn(CS.getInstruction(), TLI)) return visitAllocSite(*CS.getInstruction()); bool Changed = false; @@ -961,7 +955,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { Changed = true; } - // Try to optimize the call if possible, we require TargetData for most of + // Try to optimize the call if possible, we require DataLayout for most of // this. None of these calls are seen as possibly dead so go ahead and // delete the instruction now. if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { @@ -1013,8 +1007,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + AttrBuilder RAttrs = CallerPAL.getRetAttributes(); + if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy))) return false; // Attribute not compatible with transformed value. } @@ -1044,12 +1038,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (Attrs & Attribute::typeIncompatible(ParamTy)) + if (AttrBuilder(Attrs). + hasAttributes(Attributes::typeIncompatible(ParamTy))) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. - if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { + if (ParamTy != ActTy && Attrs.hasAttribute(Attributes::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; @@ -1101,7 +1096,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) break; Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; - if (PAttrs & Attribute::VarArgsIncompatible) + if (PAttrs.hasIncompatibleWithVarArgsAttrs()) return false; } @@ -1114,15 +1109,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { attrVec.reserve(NumCommonArgs); // Get any return attributes. - Attributes RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs = CallerPAL.getRetAttributes(); // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + RAttrs.removeAttributes(Attributes::typeIncompatible(NewRetTy)); // Add the new return attributes. - if (RAttrs) - attrVec.push_back(AttributeWithIndex::get(0, RAttrs)); + if (RAttrs.hasAttributes()) + attrVec.push_back( + AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attributes::get(FT->getContext(), RAttrs))); AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { @@ -1136,7 +1133,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + Attributes PAttrs = CallerPAL.getParamAttributes(i + 1); + if (PAttrs.hasAttributes()) attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); } @@ -1164,19 +1162,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1)) + Attributes PAttrs = CallerPAL.getParamAttributes(i + 1); + if (PAttrs.hasAttributes()) attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); } } } - if (Attributes FnAttrs = CallerPAL.getFnAttributes()) - attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); + Attributes FnAttrs = CallerPAL.getFnAttributes(); + if (FnAttrs.hasAttributes()) + attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(), + attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -1240,8 +1242,9 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. - if (Attrs.hasAttrSomewhere(Attribute::Nest)) - return 0; + for (unsigned I = 0, E = Attrs.getNumAttrs(); I != E; ++I) + if (Attrs.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) + return 0; assert(Tramp && "transformCallThroughTrampoline called with incorrect CallSite."); @@ -1254,12 +1257,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; Type *NestTy = 0; - Attributes NestAttr = Attribute::None; + Attributes NestAttr; // Look for a parameter marked with the 'nest' attribute. for (FunctionType::param_iterator I = NestFTy->param_begin(), E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) { + if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attributes::Nest)){ // Record the parameter type and any other attributes. NestTy = *I; NestAttr = NestAttrs.getParamAttributes(NestIdx); @@ -1278,8 +1281,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // mean appending it. Likewise for attributes. // Add any result attributes. - if (Attributes Attr = Attrs.getRetAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(0, Attr)); + Attributes Attr = Attrs.getRetAttributes(); + if (Attr.hasAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, + Attr)); { unsigned Idx = 1; @@ -1299,7 +1304,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - if (Attributes Attr = Attrs.getParamAttributes(Idx)) + Attr = Attrs.getParamAttributes(Idx); + if (Attr.hasAttributes()) NewAttrs.push_back (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); @@ -1308,8 +1314,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, } // Add any function attributes. - if (Attributes Attr = Attrs.getFnAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(~0, Attr)); + Attr = Attrs.getFnAttributes(); + if (Attr.hasAttributes()) + NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, + Attr)); // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy @@ -1348,7 +1356,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs); + const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 555b4428d2e8f..bb59db8e7ba17 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -13,7 +13,7 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -78,7 +78,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// try to eliminate the cast by moving the type information into the alloc. Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { - // This requires TargetData to get the alloca alignment and size information. + // This requires DataLayout to get the alloca alignment and size information. if (!TD) return 0; PointerType *PTy = cast<PointerType>(CI.getType()); @@ -229,7 +229,7 @@ isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction Type *DstTy, ///< The target type for the second cast instruction - TargetData *TD ///< The target data for pointer size + DataLayout *TD ///< The target data for pointer size ) { Type *SrcTy = CI->getOperand(0)->getType(); // A from above @@ -238,17 +238,20 @@ isEliminableCastPair( // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); - + Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(SrcTy) : 0; + Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(MidTy) : 0; + Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(DstTy) : 0; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, - TD ? TD->getIntPtrType(CI->getContext()) : 0); - + DstTy, SrcIntPtrTy, MidIntPtrTy, + DstIntPtrTy); + // We don't want to form an inttoptr or ptrtoint that converts to an integer // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && - (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || - (Res == Instruction::PtrToInt && - (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) + if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || + (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) Res = 0; return Instruction::CastOps(Res); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bdd310e97f6c2..7c3f8fe15d307 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -16,7 +16,8 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -473,7 +474,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// If we can't emit an optimized form for this expression, this returns null. /// static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { - TargetData &TD = *IC.getTargetData(); + DataLayout &TD = *IC.getDataLayout(); gep_type_iterator GTI = gep_type_begin(GEP); // Check to see if this gep only has a single variable index. If so, and if @@ -2355,8 +2356,25 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Try not to increase register pressure. BO0->hasOneUse() && BO1->hasOneUse()) { // Determine Y and Z in the form icmp (X+Y), (X+Z). - Value *Y = (A == C || A == D) ? B : A; - Value *Z = (C == A || C == B) ? D : C; + Value *Y, *Z; + if (A == C) { + // C + B == C + D -> B == D + Y = B; + Z = D; + } else if (A == D) { + // D + B == C + D -> B == C + Y = B; + Z = C; + } else if (B == C) { + // A + C == C + D -> A == D + Y = A; + Z = D; + } else { + assert(B == D); + // A + D == C + D -> A == C + Y = A; + Z = C; + } return new ICmpInst(Pred, Y, Z); } @@ -2894,10 +2912,6 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (!RHSF) break; - // We can't convert a PPC double double. - if (RHSF->getType()->isPPC_FP128Ty()) - break; - const fltSemantics *Sem; // FIXME: This shouldn't be here. if (LHSExt->getSrcTy()->isHalfTy()) @@ -2910,6 +2924,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Sem = &APFloat::IEEEquad; else if (LHSExt->getSrcTy()->isX86_FP80Ty()) Sem = &APFloat::x87DoubleExtended; + else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) + Sem = &APFloat::PPCDoubleDouble; else break; @@ -2985,6 +3001,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return Res; } break; + case Instruction::Call: { + CallInst *CI = cast<CallInst>(LHSI); + LibFunc::Func Func; + // Various optimization for fabs compared with zero. + if (RHSC->isNullValue() && CI->getCalledFunction() && + TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && + TLI->has(Func)) { + if (Func == LibFunc::fabs || Func == LibFunc::fabsf || + Func == LibFunc::fabsl) { + switch (I.getPredicate()) { + default: break; + // fabs(x) < 0 --> false + case FCmpInst::FCMP_OLT: + return ReplaceInstUsesWith(I, Builder->getFalse()); + // fabs(x) > 0 --> x != 0 + case FCmpInst::FCMP_OGT: + return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), + RHSC); + // fabs(x) <= 0 --> x == 0 + case FCmpInst::FCMP_OLE: + return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), + RHSC); + // fabs(x) >= 0 --> !isnan(x) + case FCmpInst::FCMP_OGE: + return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), + RHSC); + // fabs(x) == 0 --> x == 0 + // fabs(x) != 0 --> x != 0 + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), + RHSC); + } + } + } + } } } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c485844aaeb45..4d106fc188534 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -14,13 +14,161 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); + +/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we can't rewrite arbitrary instructions. +static bool pointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return pointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with IsOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + SmallVectorImpl<Instruction *> &ToDelete, + bool IsOffset = false) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + User *U = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, + IsOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } + } + + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(U); + if (MI == 0) + return false; + + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 0) return false; + + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +static MemTransferInst * +isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVectorImpl<Instruction *> &ToDelete) { + MemTransferInst *TheCopy = 0; + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + return TheCopy; + return 0; +} + +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast<GEPOperator>(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast<PointerType>(V->getType())) + if (PT->getElementType()->isSized()) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; +} Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that @@ -99,12 +247,16 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { return &AI; } + // If the alignment of the entry block alloca is 0 (unspecified), + // assign it the preferred alignment. + if (EntryAI->getAlignment() == 0) + EntryAI->setAlignment( + TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. - unsigned MaxAlign = - std::max(TD->getPrefTypeAlignment(EntryAI->getAllocatedType()), - TD->getPrefTypeAlignment(AI.getAllocatedType())); + unsigned MaxAlign = std::max(EntryAI->getAlignment(), + AI.getAlignment()); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); @@ -113,6 +265,31 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } + if (TD) { + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } + } + } + // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); @@ -121,7 +298,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, - const TargetData *TD) { + const DataLayout *TD) { User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); @@ -151,14 +328,14 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, SrcPTy = SrcTy->getElementType(); } - if (IC.getTargetData() && + if (IC.getDataLayout() && (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && - IC.getTargetData()->getTypeSizeInBits(SrcPTy) == - IC.getTargetData()->getTypeSizeInBits(DestPTy)) { + IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == + IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast @@ -336,11 +513,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. - if (!IC.getTargetData() || + if (!IC.getDataLayout() || SrcTy->getAddressSpace() != cast<PointerType>(CI->getType())->getAddressSpace() || - IC.getTargetData()->getTypeSizeInBits(SrcPTy) != - IC.getTargetData()->getTypeSizeInBits(DestPTy)) + IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != + IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 35a0bbb76146d..cefe45ec862ce 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. - isPowerOfTwo(PowerOf2, IC.getTargetData())) { + isPowerOfTwo(PowerOf2, IC.getDataLayout())) { A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } @@ -46,7 +46,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) if (I->isLogicalShift() && - isPowerOfTwo(I->getOperand(0), IC.getTargetData())) { + isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { @@ -462,12 +462,23 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { } } + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) { + Value *X; + ConstantInt *C1; + if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { + APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); + return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); + } + } + // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { if (*CI != 1) - N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); + N = Builder->CreateAdd(N, + ConstantInt::get(N->getType(), CI->logBase2())); if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) N = Builder->CreateZExt(N, Z->getDestTy()); if (I.isExact()) diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 664546c165511..de9c77e6005a4 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -13,7 +13,7 @@ #include "InstCombine.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 291e80019e8df..a2d4c888f2cf9 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -287,7 +287,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI) { // Trivial replacement. if (V == Op) @@ -333,6 +333,10 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // All operands were constants, fold it. if (ConstOps.size() == I->getNumOperands()) { + if (CmpInst *C = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], + ConstOps[1], TD, TLI); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) if (!LI->isVolatile()) return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); @@ -903,7 +907,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); @@ -912,6 +916,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, V); return &SI; } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) { + // Form a shufflevector instruction. + SmallVector<Constant *, 8> Mask(VWidth); + Type *Int32Ty = Type::getInt32Ty(CV->getContext()); + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elem = cast<Constant>(CV->getOperand(i)); + if (ConstantInt *E = dyn_cast<ConstantInt>(Elem)) + Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0)); + else if (isa<UndefValue>(Elem)) + Mask[i] = UndefValue::get(Int32Ty); + else + return 0; + } + Constant *MaskVal = ConstantVector::get(Mask); + Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal); + return ReplaceInstUsesWith(SI, V); + } + + if (isa<ConstantAggregateZero>(CondVal)) { + return ReplaceInstUsesWith(SI, FalseVal); + } } return 0; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 4bb2403299ce8..57021f1bef84b 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -190,7 +190,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getTargetData(), + V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), IC.getTargetLibraryInfo()); return V; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 54be8ed3fa90f..602b20337144e 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -14,7 +14,7 @@ #include "InstCombine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/IntrinsicInst.h" using namespace llvm; diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index cf60f0f426dcb..dd7ea14e8a898 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -636,8 +636,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If LHS's width is changed, shift the mask value accordingly. // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any - // references to RHSOp0 to LHSOp0, so we don't need to shift the mask. - if (eltMask >= 0 && newRHS != NULL) + // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. + // If newRHS == newLHS, we want to remap any references from newRHS to + // newLHS so that we can properly identify splats that may occur due to + // obfuscation accross the two vectors. + if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) eltMask += newLHSWidth; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 99a02fc0df3f6..ea654ae9ed0a8 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -26,8 +26,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector<Instruction*, 256> Worklist; DenseMap<Instruction*, unsigned> WorklistMap; - void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT - InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT + void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION; + InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION; public: InstCombineWorklist() {} diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 68ecd516049de..9a46f25e66ff8 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -40,7 +40,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" @@ -88,7 +88,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, *getTargetData(), GEP); + return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP); } /// ShouldChangeType - Return true if it is desirable to convert a computation @@ -805,6 +805,244 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { return true; } +/// Descale - Return a value X such that Val = X * Scale, or null if none. If +/// the multiplication is known not to overflow then NoSignedWrap is set. +Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { + assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!"); + assert(cast<IntegerType>(Val->getType())->getBitWidth() == + Scale.getBitWidth() && "Scale not compatible with value!"); + + // If Val is zero or Scale is one then Val = Val * Scale. + if (match(Val, m_Zero()) || Scale == 1) { + NoSignedWrap = true; + return Val; + } + + // If Scale is zero then it does not divide Val. + if (Scale.isMinValue()) + return 0; + + // Look through chains of multiplications, searching for a constant that is + // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4 + // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by + // a factor of 4 will produce X*(Y*2). The principle of operation is to bore + // down from Val: + // + // Val = M1 * X || Analysis starts here and works down + // M1 = M2 * Y || Doesn't descend into terms with more + // M2 = Z * 4 \/ than one use + // + // Then to modify a term at the bottom: + // + // Val = M1 * X + // M1 = Z * Y || Replaced M2 with Z + // + // Then to work back up correcting nsw flags. + + // Op - the term we are currently analyzing. Starts at Val then drills down. + // Replaced with its descaled value before exiting from the drill down loop. + Value *Op = Val; + + // Parent - initially null, but after drilling down notes where Op came from. + // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the + // 0'th operand of Val. + std::pair<Instruction*, unsigned> Parent; + + // RequireNoSignedWrap - Set if the transform requires a descaling at deeper + // levels that doesn't overflow. + bool RequireNoSignedWrap = false; + + // logScale - log base 2 of the scale. Negative if not a power of 2. + int32_t logScale = Scale.exactLogBase2(); + + for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down + + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // If Op is a constant divisible by Scale then descale to the quotient. + APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth. + APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder); + if (!Remainder.isMinValue()) + // Not divisible by Scale. + return 0; + // Replace with the quotient in the parent. + Op = ConstantInt::get(CI->getType(), Quotient); + NoSignedWrap = true; + break; + } + + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) { + + if (BO->getOpcode() == Instruction::Mul) { + // Multiplication. + NoSignedWrap = BO->hasNoSignedWrap(); + if (RequireNoSignedWrap && !NoSignedWrap) + return 0; + + // There are three cases for multiplication: multiplication by exactly + // the scale, multiplication by a constant different to the scale, and + // multiplication by something else. + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Multiplication by a constant. + if (CI->getValue() == Scale) { + // Multiplication by exactly the scale, replace the multiplication + // by its left-hand side in the parent. + Op = LHS; + break; + } + + // Otherwise drill down into the constant. + if (!Op->hasOneUse()) + return 0; + + Parent = std::make_pair(BO, 1); + continue; + } + + // Multiplication by something else. Drill down into the left-hand side + // since that's where the reassociate pass puts the good stuff. + if (!Op->hasOneUse()) + return 0; + + Parent = std::make_pair(BO, 0); + continue; + } + + if (logScale > 0 && BO->getOpcode() == Instruction::Shl && + isa<ConstantInt>(BO->getOperand(1))) { + // Multiplication by a power of 2. + NoSignedWrap = BO->hasNoSignedWrap(); + if (RequireNoSignedWrap && !NoSignedWrap) + return 0; + + Value *LHS = BO->getOperand(0); + int32_t Amt = cast<ConstantInt>(BO->getOperand(1))-> + getLimitedValue(Scale.getBitWidth()); + // Op = LHS << Amt. + + if (Amt == logScale) { + // Multiplication by exactly the scale, replace the multiplication + // by its left-hand side in the parent. + Op = LHS; + break; + } + if (Amt < logScale || !Op->hasOneUse()) + return 0; + + // Multiplication by more than the scale. Reduce the multiplying amount + // by the scale in the parent. + Parent = std::make_pair(BO, 1); + Op = ConstantInt::get(BO->getType(), Amt - logScale); + break; + } + } + + if (!Op->hasOneUse()) + return 0; + + if (CastInst *Cast = dyn_cast<CastInst>(Op)) { + if (Cast->getOpcode() == Instruction::SExt) { + // Op is sign-extended from a smaller type, descale in the smaller type. + unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + APInt SmallScale = Scale.trunc(SmallSize); + // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to + // descale Op as (sext Y) * Scale. In order to have + // sext (Y * SmallScale) = (sext Y) * Scale + // some conditions need to hold however: SmallScale must sign-extend to + // Scale and the multiplication Y * SmallScale should not overflow. + if (SmallScale.sext(Scale.getBitWidth()) != Scale) + // SmallScale does not sign-extend to Scale. + return 0; + assert(SmallScale.exactLogBase2() == logScale); + // Require that Y * SmallScale must not overflow. + RequireNoSignedWrap = true; + + // Drill down through the cast. + Parent = std::make_pair(Cast, 0); + Scale = SmallScale; + continue; + } + + if (Cast->getOpcode() == Instruction::Trunc) { + // Op is truncated from a larger type, descale in the larger type. + // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then + // trunc (Y * sext Scale) = (trunc Y) * Scale + // always holds. However (trunc Y) * Scale may overflow even if + // trunc (Y * sext Scale) does not, so nsw flags need to be cleared + // from this point up in the expression (see later). + if (RequireNoSignedWrap) + return 0; + + // Drill down through the cast. + unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + Parent = std::make_pair(Cast, 0); + Scale = Scale.sext(LargeSize); + if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits()) + logScale = -1; + assert(Scale.exactLogBase2() == logScale); + continue; + } + } + + // Unsupported expression, bail out. + return 0; + } + + // We know that we can successfully descale, so from here on we can safely + // modify the IR. Op holds the descaled version of the deepest term in the + // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known + // not to overflow. + + if (!Parent.first) + // The expression only had one term. + return Op; + + // Rewrite the parent using the descaled version of its operand. + assert(Parent.first->hasOneUse() && "Drilled down when more than one use!"); + assert(Op != Parent.first->getOperand(Parent.second) && + "Descaling was a no-op?"); + Parent.first->setOperand(Parent.second, Op); + Worklist.Add(Parent.first); + + // Now work back up the expression correcting nsw flags. The logic is based + // on the following observation: if X * Y is known not to overflow as a signed + // multiplication, and Y is replaced by a value Z with smaller absolute value, + // then X * Z will not overflow as a signed multiplication either. As we work + // our way up, having NoSignedWrap 'true' means that the descaled value at the + // current level has strictly smaller absolute value than the original. + Instruction *Ancestor = Parent.first; + do { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) { + // If the multiplication wasn't nsw then we can't say anything about the + // value of the descaled multiplication, and we have to clear nsw flags + // from this point on up. + bool OpNoSignedWrap = BO->hasNoSignedWrap(); + NoSignedWrap &= OpNoSignedWrap; + if (NoSignedWrap != OpNoSignedWrap) { + BO->setHasNoSignedWrap(NoSignedWrap); + Worklist.Add(Ancestor); + } + } else if (Ancestor->getOpcode() == Instruction::Trunc) { + // The fact that the descaled input to the trunc has smaller absolute + // value than the original input doesn't tell us anything useful about + // the absolute values of the truncations. + NoSignedWrap = false; + } + assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) && + "Failed to keep proper track of nsw flags while drilling down?"); + + if (Ancestor == Val) + // Got to the top, all done! + return Val; + + // Move up one level in the expression. + assert(Ancestor->hasOneUse() && "Drilled down when more than one use!"); + Ancestor = Ancestor->use_back(); + } while (1); +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); @@ -817,7 +1055,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - Type *IntPtrTy = TD->getIntPtrType(GEP.getContext()); + Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); @@ -836,7 +1074,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { + if (IndexTy != IntPtrTy) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -855,7 +1093,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) return 0; - // Note that if our source is a gep chain itself that we wait for that + // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. if (GEPOperator *SrcGEP = @@ -987,63 +1225,74 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } // Transform things like: + // %V = mul i64 %N, 4 + // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V + // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast + if (TD && ResElTy->isSized() && SrcElTy->isSized()) { + // Check that changing the type amounts to dividing the index by a scale + // factor. + uint64_t ResSize = TD->getTypeAllocSize(ResElTy); + uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy); + if (ResSize && SrcSize % ResSize == 0) { + Value *Idx = GEP.getOperand(1); + unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); + uint64_t Scale = SrcSize / ResSize; + + // Earlier transforms ensure that the index has type IntPtrType, which + // considerably simplifies the logic by eliminating implicit casts. + assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + "Index not cast to pointer width?"); + + bool NSW; + if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { + // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. + // If the multiplication NewIdx * Scale may overflow then the new + // GEP may not be "inbounds". + Value *NewGEP = GEP.isInBounds() && NSW ? + Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); + } + } + } + + // Similarly, transform things like: // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - - if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) { + if (TD && ResElTy->isSized() && SrcElTy->isSized() && + SrcElTy->isArrayTy()) { + // Check that changing to the array element type amounts to dividing the + // index by a scale factor. + uint64_t ResSize = TD->getTypeAllocSize(ResElTy); uint64_t ArrayEltSize = - TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); - - // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We - // allow either a mul, shift, or constant here. - Value *NewIdx = 0; - ConstantInt *Scale = 0; - if (ArrayEltSize == 1) { - NewIdx = GEP.getOperand(1); - Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); - } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { - NewIdx = ConstantInt::get(CI->getType(), 1); - Scale = CI; - } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){ - if (Inst->getOpcode() == Instruction::Shl && - isa<ConstantInt>(Inst->getOperand(1))) { - ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); - uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), - 1ULL << ShAmtVal); - NewIdx = Inst->getOperand(0); - } else if (Inst->getOpcode() == Instruction::Mul && - isa<ConstantInt>(Inst->getOperand(1))) { - Scale = cast<ConstantInt>(Inst->getOperand(1)); - NewIdx = Inst->getOperand(0); + TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); + if (ResSize && ArrayEltSize % ResSize == 0) { + Value *Idx = GEP.getOperand(1); + unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); + uint64_t Scale = ArrayEltSize / ResSize; + + // Earlier transforms ensure that the index has type IntPtrType, which + // considerably simplifies the logic by eliminating implicit casts. + assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && + "Index not cast to pointer width?"); + + bool NSW; + if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { + // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. + // If the multiplication NewIdx * Scale may overflow then the new + // GEP may not be "inbounds". + Value *Off[2]; + Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); + Off[1] = NewIdx; + Value *NewGEP = GEP.isInBounds() && NSW ? + Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); + // The NewGEP must be pointer typed, so must the old one -> BitCast + return new BitCastInst(NewGEP, GEP.getType()); } } - - // If the index will be to exactly the right offset with the scale taken - // out, perform the transformation. Note, we don't know whether Scale is - // signed or not. We'll use unsigned version of division/modulo - // operation after making sure Scale doesn't have the sign bit set. - if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && - Scale->getZExtValue() % ArrayEltSize == 0) { - Scale = ConstantInt::get(Scale->getType(), - Scale->getZExtValue() / ArrayEltSize); - if (Scale->getZExtValue() != 1) { - Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), - false /*ZExt*/); - NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); - } - - // Insert the new GEP instruction. - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext())); - Idx[1] = NewIdx; - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()): - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); - // The NewGEP must be pointer typed, so must the old one -> BitCast - return new BitCastInst(NewGEP, GEP.getType()); - } } } } @@ -1068,7 +1317,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || - isAllocationFn(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0), TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1107,7 +1356,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 4> Worklist; Worklist.push_back(AI); @@ -1163,7 +1413,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { } } - if (isFreeCall(I)) { + if (isFreeCall(I, TLI)) { Users.push_back(I); continue; } @@ -1188,7 +1438,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector<WeakVH, 64> Users; - if (isAllocSiteRemovable(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users, TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null<Instruction>(&*Users[i]); if (!I) continue; @@ -1853,7 +2103,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet<BasicBlock*, 64> &Visited, InstCombiner &IC, - const TargetData *TD, + const DataLayout *TD, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; @@ -1872,7 +2122,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Instruction *Inst = BBI++; // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); @@ -2002,7 +2252,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { if (I == 0) continue; // skip null values. // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { DEBUG(errs() << "IC: DCE: " << *I << '\n'); EraseInstFromFunction(*I); ++NumDeadInst; @@ -2102,7 +2352,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // If the instruction was modified, it's possible that it is now dead. // if so, remove it. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { EraseInstFromFunction(*I); } else { Worklist.Add(I); @@ -2117,9 +2367,27 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { return MadeIRChange; } +namespace { +class InstCombinerLibCallSimplifier : public LibCallSimplifier { + InstCombiner *IC; +public: + InstCombinerLibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + InstCombiner *IC) + : LibCallSimplifier(TD, TLI) { + this->IC = IC; + } + + /// replaceAllUsesWith - override so that instruction replacement + /// can be defined in terms of the instruction combiner framework. + virtual void replaceAllUsesWith(Instruction *I, Value *With) const { + IC->ReplaceInstUsesWith(*I, With); + } +}; +} bool InstCombiner::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable<TargetData>(); + TD = getAnalysisIfAvailable<DataLayout>(); TLI = &getAnalysis<TargetLibraryInfo>(); /// Builder - This is an IRBuilder that automatically inserts new @@ -2129,6 +2397,9 @@ bool InstCombiner::runOnFunction(Function &F) { InstCombineIRInserter(Worklist)); Builder = &TheBuilder; + InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this); + Simplifier = &TheSimplifier; + bool EverMadeChange = false; // Lower dbg.declare intrinsics otherwise their value may be clobbered |