summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp51
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp32
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp115
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp331
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp42
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp67
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h22
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp9
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp431
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp43
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp32
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp4
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp6
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp212
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp30
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp7
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp41
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp30
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp5
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp22
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp2
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp14
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp9
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp43
-rw-r--r--lib/Transforms/Utils/Local.cpp54
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp14
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp8
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp23
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp18
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp38
-rw-r--r--lib/Transforms/Vectorize/LoadStoreVectorizer.cpp32
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp27
33 files changed, 1080 insertions, 764 deletions
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index a2f6e5639d9d4..78e71c18fe290 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -17,7 +17,9 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -27,10 +29,21 @@
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
-#define DEBUG_TYPE "partialinlining"
+#define DEBUG_TYPE "partial-inlining"
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+// Command line option to disable partial-inlining. The default is false:
+static cl::opt<bool>
+ DisablePartialInlining("disable-partial-inlining", cl::init(false),
+ cl::Hidden, cl::desc("Disable partial ininling"));
+
+// Command line option to set the maximum number of partial inlining allowed
+// for the module. The default value of -1 means no limit.
+static cl::opt<int> MaxNumPartialInlining(
+ "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of partial inlining. The default is unlimited"));
+
namespace {
struct PartialInlinerImpl {
PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
@@ -39,6 +52,12 @@ struct PartialInlinerImpl {
private:
InlineFunctionInfo IFI;
+ int NumPartialInlining = 0;
+
+ bool IsLimitReached() {
+ return (MaxNumPartialInlining != -1 &&
+ NumPartialInlining >= MaxNumPartialInlining);
+ }
};
struct PartialInlinerLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -66,6 +85,9 @@ struct PartialInlinerLegacyPass : public ModulePass {
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// First, verify that this function is an unswitching candidate...
+ if (F->hasAddressTaken())
+ return nullptr;
+
BasicBlock *EntryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
if (!BR || BR->isUnconditional())
@@ -149,11 +171,29 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// Inline the top-level if test into all callers.
std::vector<User *> Users(DuplicateFunction->user_begin(),
DuplicateFunction->user_end());
- for (User *User : Users)
+
+ for (User *User : Users) {
+ CallSite CS;
if (CallInst *CI = dyn_cast<CallInst>(User))
- InlineFunction(CI, IFI);
+ CS = CallSite(CI);
else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
- InlineFunction(II, IFI);
+ CS = CallSite(II);
+ else
+ llvm_unreachable("All uses must be calls");
+
+ if (IsLimitReached())
+ continue;
+ NumPartialInlining++;
+
+ OptimizationRemarkEmitter ORE(CS.getCaller());
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ BasicBlock *Block = CS.getParent();
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
+ << ore::NV("Callee", F) << " partially inlined into "
+ << ore::NV("Caller", CS.getCaller()));
+
+ InlineFunction(CS, IFI);
+ }
// Ditch the duplicate, since we're done with it, and rewrite all remaining
// users (function pointers, etc.) back to the original function.
@@ -166,6 +206,9 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
}
bool PartialInlinerImpl::run(Module &M) {
+ if (DisablePartialInlining)
+ return false;
+
std::vector<Function *> Worklist;
Worklist.reserve(M.size());
for (Function &F : M)
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index f11b58d1adc48..0d5910ebbfcc9 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -282,6 +282,12 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
}
if (!PGOInstrUse.empty())
MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+ // Indirect call promotion that promotes intra-module targets only.
+ // For ThinLTO this is done earlier due to interactions with globalopt
+ // for imported functions. We don't run this at -O0.
+ if (OptLevel > 0)
+ MPM.add(
+ createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
void PassManagerBuilder::addFunctionSimplificationPasses(
legacy::PassManagerBase &MPM) {
@@ -414,11 +420,14 @@ void PassManagerBuilder::populateModulePassManager(
else if (!GlobalExtensions->empty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+
+ // Rename anon globals to be able to export them in the summary.
+ // This has to be done after we add the extensions to the pass manager
+ // as there could be passes (e.g. Adddress sanitizer) which introduce
+ // new unnamed globals.
if (PrepareForThinLTO)
- // Rename anon globals to be able to export them in the summary.
MPM.add(createNameAnonGlobalPass());
-
- addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -468,16 +477,10 @@ void PassManagerBuilder::populateModulePassManager(
// For SamplePGO in ThinLTO compile phase, we do not want to do indirect
// call promotion as it will change the CFG too much to make the 2nd
// profile annotation in backend more difficult.
- if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) {
- /// PGO instrumentation is added during the compile phase for ThinLTO, do
- /// not run it a second time
+ // PGO instrumentation is added during the compile phase for ThinLTO, do
+ // not run it a second time
+ if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
- // Indirect call promotion that promotes intra-module targets only.
- // For ThinLTO this is done earlier due to interactions with globalopt
- // for imported functions.
- MPM.add(
- createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
- }
if (EnableNonLTOGlobalsModRef)
// We add a module alias analysis pass here. In part due to bugs in the
@@ -677,6 +680,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopSinkPass());
// Get rid of LCSSA nodes.
MPM.add(createInstructionSimplifierPass());
+
+ // LoopSink (and other loop passes since the last simplifyCFG) might have
+ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+ MPM.add(createCFGSimplificationPass());
+
addExtensionsToPM(EP_OptimizerLast, MPM);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index e30a4bafb9b0c..030461004f568 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -794,6 +795,11 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
if (Opnd->isConstant())
continue;
+ // The constant check above is really for a few special constant
+ // coefficients.
+ if (isa<UndefValue>(Opnd->getSymVal()))
+ continue;
+
const FAddendCoef &CE = Opnd->getCoef();
if (CE.isMinusOne() || CE.isMinusTwo())
NegOpndNum++;
@@ -894,24 +900,22 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
+ KnownBits LHSKnown(BitWidth);
+ computeKnownBits(LHS, LHSKnown, 0, &CxtI);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(RHS, RHSKnown, 0, &CxtI);
// Addition of two 2's complement numbers having opposite signs will never
// overflow.
- if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) ||
- (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1]))
+ if ((LHSKnown.One[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]) ||
+ (LHSKnown.Zero[BitWidth - 1] && RHSKnown.One[BitWidth - 1]))
return true;
// Check if carry bit of addition will not cause overflow.
- if (checkRippleForAdd(LHSKnownZero, RHSKnownZero))
+ if (checkRippleForAdd(LHSKnown.Zero, RHSKnown.Zero))
return true;
- if (checkRippleForAdd(RHSKnownZero, LHSKnownZero))
+ if (checkRippleForAdd(RHSKnown.Zero, LHSKnown.Zero))
return true;
return false;
@@ -931,18 +935,16 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
+ KnownBits LHSKnown(BitWidth);
+ computeKnownBits(LHS, LHSKnown, 0, &CxtI);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(RHS, RHSKnown, 0, &CxtI);
// Subtraction of two 2's complement numbers having identical signs will
// never overflow.
- if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) ||
- (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1]))
+ if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) ||
+ (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]))
return true;
// TODO: implement logic similar to checkRippleForAdd
@@ -1113,10 +1115,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// a sub and fuse this add with it.
if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
IntegerType *IT = cast<IntegerType>(I.getType());
- APInt LHSKnownOne(IT->getBitWidth(), 0);
- APInt LHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I);
- if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
+ KnownBits LHSKnown(IT->getBitWidth());
+ computeKnownBits(XorLHS, LHSKnown, 0, &I);
+ if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue())
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
@@ -1385,39 +1386,58 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
Value *LHSIntVal = LHSConv->getOperand(0);
+ Type *FPType = LHSConv->getType();
+
+ // TODO: This check is overly conservative. In many cases known bits
+ // analysis can tell us that the result of the addition has less significant
+ // bits than the integer type can hold.
+ auto IsValidPromotion = [](Type *FTy, Type *ITy) {
+ Type *FScalarTy = FTy->getScalarType();
+ Type *IScalarTy = ITy->getScalarType();
+
+ // Do we have enough bits in the significand to represent the result of
+ // the integer addition?
+ unsigned MaxRepresentableBits =
+ APFloat::semanticsPrecision(FScalarTy->getFltSemantics());
+ return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits;
+ };
// (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
// requires a constant pool load, and generally allows the add to be better
// instcombined.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
- Constant *CI =
- ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
- if (LHSConv->hasOneUse() &&
- ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
- CI, "addconv");
- return new SIToFPInst(NewAdd, I.getType());
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+ if (IsValidPromotion(FPType, LHSIntVal->getType())) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+ CI, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
}
- }
// (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
Value *RHSIntVal = RHSConv->getOperand(0);
-
- // Only do this if x/y have the same type, if at least one of them has a
- // single use (so we don't increase the number of int->fp conversions),
- // and if the integer add will not overflow.
- if (LHSIntVal->getType() == RHSIntVal->getType() &&
- (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
- RHSIntVal, "addconv");
- return new SIToFPInst(NewAdd, I.getType());
+ // It's enough to check LHS types only because we require int types to
+ // be the same for this transform.
+ if (IsValidPromotion(FPType, LHSIntVal->getType())) {
+ // Only do this if x/y have the same type, if at least one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSIntVal->getType() == RHSIntVal->getType() &&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+ RHSIntVal, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
}
}
}
@@ -1617,10 +1637,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
if (Op0C->isMask()) {
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(Op1, RHSKnownZero, RHSKnownOne, 0, &I);
- if ((*Op0C | RHSKnownZero).isAllOnesValue())
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(Op1, RHSKnown, 0, &I);
+ if ((*Op0C | RHSKnown.Zero).isAllOnesValue())
return BinaryOperator::CreateXor(Op1, Op0);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3a98e8937bda7..a97b5a9ec0bb8 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -906,15 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (PredL) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
- case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
- case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
- return LHS;
- }
case ICmpInst::ICMP_NE:
switch (PredR) {
default:
@@ -930,43 +921,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
return Builder->CreateICmpSLT(LHS0, LHSC);
break; // (X != 13 & X s< 15) -> no change
- case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
- case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
- return RHS;
case ICmpInst::ICMP_NE:
// Potential folds for this case should already be handled.
break;
}
break;
- case ICmpInst::ICMP_ULT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
- case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
- case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
- return LHS;
- }
- break;
- case ICmpInst::ICMP_SLT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
- case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
- return LHS;
- }
- break;
case ICmpInst::ICMP_UGT:
switch (PredR) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
- return RHS;
case ICmpInst::ICMP_NE:
if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
return Builder->CreateICmp(PredL, LHS0, RHSC);
@@ -980,9 +943,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (PredR) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
- return RHS;
case ICmpInst::ICMP_NE:
if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
return Builder->CreateICmp(PredL, LHS0, RHSC);
@@ -1234,6 +1194,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
return nullptr;
}
+static Instruction *foldAndToXor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::And);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *A, *B;
+
+ // Operand complexity canonicalization guarantees that the 'or' is Op0.
+ // (A | B) & ~(A & B) --> A ^ B
+ // (A | B) & ~(B & A) --> A ^ B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateXor(A, B);
+
+ // (A | ~B) & (~A | B) --> ~(A ^ B)
+ // (A | ~B) & (B | ~A) --> ~(A ^ B)
+ // (~B | A) & (~A | B) --> ~(A ^ B)
+ // (~B | A) & (B | ~A) --> ~(A ^ B)
+ if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+ return nullptr;
+}
+
+static Instruction *foldOrToXor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *A, *B;
+
+ // Operand complexity canonicalization guarantees that the 'and' is Op0.
+ // (A & B) | ~(A | B) --> ~(A ^ B)
+ // (A & B) | ~(B | A) --> ~(A ^ B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+ // (A & ~B) | (~A & B) --> A ^ B
+ // (A & ~B) | (B & ~A) --> A ^ B
+ // (~B & A) | (~A & B) --> A ^ B
+ // (~B & A) | (B & ~A) --> A ^ B
+ if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateXor(A, B);
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -1247,15 +1257,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
- // (A|B)&(A|C) -> A|(B&C) etc
- if (Value *V = SimplifyUsingDistributiveLaws(I))
- return replaceInstUsesWith(I, V);
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
+ // Do this before using distributive laws to catch simple and/or/not patterns.
+ if (Instruction *Xor = foldAndToXor(I, *Builder))
+ return Xor;
+
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
@@ -1366,19 +1380,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return DeMorgan;
{
- Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- // (A|B) & ~(A&B) -> A^B
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
- ((A == C && B == D) || (A == D && B == C)))
- return BinaryOperator::CreateXor(A, B);
-
- // ~(A&B) & (A|B) -> A^B
- if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
- match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
- ((A == C && B == D) || (A == D && B == C)))
- return BinaryOperator::CreateXor(A, B);
-
+ Value *A = nullptr, *B = nullptr, *C = nullptr;
// A&(A^B) => A & ~B
{
Value *tmpOp0 = Op0;
@@ -1405,11 +1407,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
// (A&((~A)|B)) -> A&B
- if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
- match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A))))
return BinaryOperator::CreateAnd(A, Op1);
- if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
- match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A))))
return BinaryOperator::CreateAnd(A, Op0);
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
@@ -1425,13 +1425,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
// (A | B) & ((~A) ^ B) -> (A & B)
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
+ // (A | B) & (B ^ (~A)) -> (A & B)
+ // (B | A) & ((~A) ^ B) -> (A & B)
+ // (B | A) & (B ^ (~A)) -> (A & B)
+ if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateAnd(A, B);
// ((~A) ^ B) & (A | B) -> (A & B)
// ((~A) ^ B) & (B | A) -> (A & B)
- if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ // (B ^ (~A)) & (A | B) -> (A & B)
+ // (B ^ (~A)) & (B | A) -> (A & B)
+ if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateAnd(A, B);
}
@@ -2037,15 +2042,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
- // (A&B)|(A&C) -> A&(B|C) etc
- if (Value *V = SimplifyUsingDistributiveLaws(I))
- return replaceInstUsesWith(I, V);
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
+ // Do this before using distributive laws to catch simple and/or/not patterns.
+ if (Instruction *Xor = foldOrToXor(I, *Builder))
+ return Xor;
+
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
@@ -2105,19 +2114,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
match(Op0, m_c_And(m_Specific(A), m_Value(B))))
return BinaryOperator::CreateOr(Op1, B);
- // (A & ~B) | (A ^ B) -> (A ^ B)
- // (~B & A) | (A ^ B) -> (A ^ B)
- if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Xor(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
- // Commute the 'or' operands.
- // (A ^ B) | (A & ~B) -> (A ^ B)
- // (A ^ B) | (~B & A) -> (A ^ B)
- if (match(Op1, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op0, m_Xor(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
// (A & C)|(B & D)
Value *C = nullptr, *D = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
@@ -2182,23 +2178,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
}
- // ((A&~B)|(~A&B)) -> A^B
- if ((match(C, m_Not(m_Specific(D))) &&
- match(B, m_Not(m_Specific(A)))))
- return BinaryOperator::CreateXor(A, D);
- // ((~B&A)|(~A&B)) -> A^B
- if ((match(A, m_Not(m_Specific(D))) &&
- match(B, m_Not(m_Specific(C)))))
- return BinaryOperator::CreateXor(C, D);
- // ((A&~B)|(B&~A)) -> A^B
- if ((match(C, m_Not(m_Specific(B))) &&
- match(D, m_Not(m_Specific(A)))))
- return BinaryOperator::CreateXor(A, B);
- // ((~B&A)|(B&~A)) -> A^B
- if ((match(A, m_Not(m_Specific(B))) &&
- match(D, m_Not(m_Specific(C)))))
- return BinaryOperator::CreateXor(C, B);
-
// ((A|B)&1)|(B&-2) -> (A&1) | B
if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
match(A, m_Or(m_Specific(B), m_Value(V1)))) {
@@ -2374,6 +2353,58 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
+/// A ^ B can be specified using other logic ops in a variety of patterns. We
+/// can fold these early and efficiently by morphing an existing instruction.
+static Instruction *foldXorToXor(BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Xor);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *A, *B;
+
+ // There are 4 commuted variants for each of the basic patterns.
+
+ // (A & B) ^ (A | B) -> A ^ B
+ // (A & B) ^ (B | A) -> A ^ B
+ // (A | B) ^ (A & B) -> A ^ B
+ // (A | B) ^ (B & A) -> A ^ B
+ if ((match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) ||
+ (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) {
+ I.setOperand(0, A);
+ I.setOperand(1, B);
+ return &I;
+ }
+
+ // (A | ~B) ^ (~A | B) -> A ^ B
+ // (~B | A) ^ (~A | B) -> A ^ B
+ // (~A | B) ^ (A | ~B) -> A ^ B
+ // (B | ~A) ^ (A | ~B) -> A ^ B
+ if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
+ I.setOperand(0, A);
+ I.setOperand(1, B);
+ return &I;
+ }
+
+ // (A & ~B) ^ (~A & B) -> A ^ B
+ // (~B & A) ^ (~A & B) -> A ^ B
+ // (~A & B) ^ (A & ~B) -> A ^ B
+ // (B & ~A) ^ (A & ~B) -> A ^ B
+ if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) {
+ I.setOperand(0, A);
+ I.setOperand(1, B);
+ return &I;
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2387,6 +2418,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
+ if (Instruction *NewXor = foldXorToXor(I))
+ return NewXor;
+
// (A&B)^(A&C) -> A&(B^C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
@@ -2399,44 +2433,39 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
- // Is this a ~ operation?
- if (Value *NotOp = dyn_castNotVal(&I)) {
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
- if (Op0I->getOpcode() == Instruction::And ||
- Op0I->getOpcode() == Instruction::Or) {
- // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
- // ~(~X | Y) === (X & ~Y) - De Morgan's Law
- if (dyn_castNotVal(Op0I->getOperand(1)))
- Op0I->swapOperands();
- if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
- Value *NotY =
- Builder->CreateNot(Op0I->getOperand(1),
- Op0I->getOperand(1)->getName()+".not");
- if (Op0I->getOpcode() == Instruction::And)
- return BinaryOperator::CreateOr(Op0NotVal, NotY);
- return BinaryOperator::CreateAnd(Op0NotVal, NotY);
- }
-
- // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
- // ~(X | Y) === (~X & ~Y) - De Morgan's Law
- if (IsFreeToInvert(Op0I->getOperand(0),
- Op0I->getOperand(0)->hasOneUse()) &&
- IsFreeToInvert(Op0I->getOperand(1),
- Op0I->getOperand(1)->hasOneUse())) {
- Value *NotX =
- Builder->CreateNot(Op0I->getOperand(0), "notlhs");
- Value *NotY =
- Builder->CreateNot(Op0I->getOperand(1), "notrhs");
- if (Op0I->getOpcode() == Instruction::And)
- return BinaryOperator::CreateOr(NotX, NotY);
- return BinaryOperator::CreateAnd(NotX, NotY);
- }
+ // Is this a 'not' (~) fed by a binary operator?
+ BinaryOperator *NotOp;
+ if (match(&I, m_Not(m_BinOp(NotOp)))) {
+ if (NotOp->getOpcode() == Instruction::And ||
+ NotOp->getOpcode() == Instruction::Or) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (dyn_castNotVal(NotOp->getOperand(1)))
+ NotOp->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) {
+ Value *NotY = Builder->CreateNot(
+ NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not");
+ if (NotOp->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
- } else if (Op0I->getOpcode() == Instruction::AShr) {
- // ~(~X >>s Y) --> (X >>s Y)
- if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
- return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+ // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+ // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ if (IsFreeToInvert(NotOp->getOperand(0),
+ NotOp->getOperand(0)->hasOneUse()) &&
+ IsFreeToInvert(NotOp->getOperand(1),
+ NotOp->getOperand(1)->hasOneUse())) {
+ Value *NotX = Builder->CreateNot(NotOp->getOperand(0), "notlhs");
+ Value *NotY = Builder->CreateNot(NotOp->getOperand(1), "notrhs");
+ if (NotOp->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(NotX, NotY);
+ return BinaryOperator::CreateAnd(NotX, NotY);
}
+ } else if (NotOp->getOpcode() == Instruction::AShr) {
+ // ~(~X >>s Y) --> (X >>s Y)
+ if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0)))
+ return BinaryOperator::CreateAShr(Op0NotVal, NotOp->getOperand(1));
}
}
@@ -2574,40 +2603,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
{
Value *A, *B, *C, *D;
- // (A & B)^(A | B) -> A ^ B
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_Or(m_Value(C), m_Value(D)))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- // (A | B)^(A & B) -> A ^ B
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_And(m_Value(C), m_Value(D)))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- // (A | ~B) ^ (~A | B) -> A ^ B
- // (~B | A) ^ (~A | B) -> A ^ B
- if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
- // (~A | B) ^ (A | ~B) -> A ^ B
- if (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
- return BinaryOperator::CreateXor(A, B);
- }
- // (A & ~B) ^ (~A & B) -> A ^ B
- // (~B & A) ^ (~A & B) -> A ^ B
- if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
- // (~A & B) ^ (A & ~B) -> A ^ B
- if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
- return BinaryOperator::CreateXor(A, B);
- }
// (A ^ C)^(A | B) -> ((~A) & B) ^ C
if (match(Op0, m_Xor(m_Value(D), m_Value(C))) &&
match(Op1, m_Or(m_Value(A), m_Value(B)))) {
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e7aa1a4573714..313ab13b9e2b7 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -44,6 +44,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
@@ -1378,14 +1379,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
return nullptr;
unsigned BitWidth = IT->getBitWidth();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II);
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
- unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros()
- : KnownOne.countLeadingZeros();
+ unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros()
+ : Known.One.countLeadingZeros();
APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
: APInt::getHighBitsSet(BitWidth, NumMaskBits);
@@ -1393,7 +1393,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
// zero, this value is constant.
// FIXME: This should be in InstSimplify because we're replacing an
// instruction with a constant.
- if ((Mask & KnownZero) == Mask) {
+ if (Mask.isSubsetOf(Known.Zero)) {
auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
return IC.replaceInstUsesWith(II, C);
}
@@ -1401,7 +1401,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
// If the input to cttz/ctlz is known to be non-zero,
// then change the 'ZeroIsUndef' parameter to 'true'
// because we know the zero behavior can't affect the result.
- if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
+ if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
if (!match(II.getArgOperand(1), m_One())) {
II.setOperand(1, IC.Builder->getTrue());
return &II;
@@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
- return replaceInstUsesWith(*II,
- ConstantExpr::getSExt(CCmp, II->getType()));
+ if (CCmp->isNullValue()) {
+ return replaceInstUsesWith(
+ *II, ConstantExpr::getSExt(CCmp, II->getType()));
+ }
+
+ // The result of V_ICMP/V_FCMP assembly instructions (which this
+ // intrinsic exposes) is one bit per thread, masked with the EXEC
+ // register (which contains the bitmask of live threads). So a
+ // comparison that always returns true is the same as a read of the
+ // EXEC register.
+ Value *NewF = Intrinsic::getDeclaration(
+ II->getModule(), Intrinsic::read_register, II->getType());
+ Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
+ MDNode *MD = MDNode::get(II->getContext(), MDArgs);
+ Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
+ CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ NewCall->addAttribute(AttributeList::FunctionIndex,
+ Attribute::Convergent);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
}
// Canonicalize constants to RHS.
@@ -3599,9 +3617,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// If there is a dominating assume with the same condition as this one,
// then this one is redundant, and should be removed.
- APInt KnownZero(1, 0), KnownOne(1, 0);
- computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
- if (KnownOne.isAllOnesValue())
+ KnownBits Known(1);
+ computeKnownBits(IIOperand, Known, 0, II);
+ if (Known.One.isAllOnesValue())
return eraseInstFromFunction(*II);
// Update the cache of affected values for this assumption (we might be
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9127ddca59150..312d9baae43a6 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,9 +14,10 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -676,11 +677,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// This only works for EQ and NE
ICI->isEquality()) {
// If Op1C some other power of two, convert:
- uint32_t BitWidth = Op1C->getType()->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI);
+ KnownBits Known(Op1C->getType()->getBitWidth());
+ computeKnownBits(ICI->getOperand(0), Known, 0, &CI);
- APInt KnownZeroMask(~KnownZero);
+ APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
if (!DoTransform) return ICI;
@@ -726,13 +726,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
- APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
- APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI);
- computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI);
+ KnownBits KnownLHS(BitWidth);
+ KnownBits KnownRHS(BitWidth);
+ computeKnownBits(LHS, KnownLHS, 0, &CI);
+ computeKnownBits(RHS, KnownRHS, 0, &CI);
- if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
- APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
+ APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
if (!DoTransform) return ICI;
@@ -740,7 +740,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Value *Result = Builder->CreateXor(LHS, RHS);
// Mask off any bits that are set and won't be shifted away.
- if (KnownOneLHS.uge(UnknownBit))
+ if (KnownLHS.One.uge(UnknownBit))
Result = Builder->CreateAnd(Result,
ConstantInt::get(ITy, UnknownBit));
@@ -1049,10 +1049,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
if (ICI->hasOneUse() &&
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI);
+ KnownBits Known(BitWidth);
+ computeKnownBits(Op0, Known, 0, &CI);
- APInt KnownZeroMask(~KnownZero);
+ APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) {
Value *In = ICI->getOperand(0);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 003029ae39d56..d846a631b96f6 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -175,19 +176,18 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
/// Given a signed integer type and a set of known zero and one bits, compute
/// the maximum and minimum values that could have the specified known zero and
/// known one bits, returning them in Min/Max.
-static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
- const APInt &KnownOne,
+/// TODO: Move to method on KnownBits struct?
+static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
APInt &Min, APInt &Max) {
- assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
- KnownZero.getBitWidth() == Min.getBitWidth() &&
- KnownZero.getBitWidth() == Max.getBitWidth() &&
+ assert(Known.getBitWidth() == Min.getBitWidth() &&
+ Known.getBitWidth() == Max.getBitWidth() &&
"KnownZero, KnownOne and Min, Max must have equal bitwidth.");
- APInt UnknownBits = ~(KnownZero|KnownOne);
+ APInt UnknownBits = ~(Known.Zero|Known.One);
// The minimum value is when all unknown bits are zeros, EXCEPT for the sign
// bit if it is unknown.
- Min = KnownOne;
- Max = KnownOne|UnknownBits;
+ Min = Known.One;
+ Max = Known.One|UnknownBits;
if (UnknownBits.isNegative()) { // Sign bit is unknown
Min.setBit(Min.getBitWidth()-1);
@@ -198,19 +198,18 @@ static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
/// Given an unsigned integer type and a set of known zero and one bits, compute
/// the maximum and minimum values that could have the specified known zero and
/// known one bits, returning them in Min/Max.
-static void computeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
- const APInt &KnownOne,
+/// TODO: Move to method on KnownBits struct?
+static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known,
APInt &Min, APInt &Max) {
- assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
- KnownZero.getBitWidth() == Min.getBitWidth() &&
- KnownZero.getBitWidth() == Max.getBitWidth() &&
+ assert(Known.getBitWidth() == Min.getBitWidth() &&
+ Known.getBitWidth() == Max.getBitWidth() &&
"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
- APInt UnknownBits = ~(KnownZero|KnownOne);
+ APInt UnknownBits = ~(Known.Zero|Known.One);
// The minimum value is when the unknown bits are all zeros.
- Min = KnownOne;
+ Min = Known.One;
// The maximum value is when the unknown bits are all ones.
- Max = KnownOne|UnknownBits;
+ Max = Known.One|UnknownBits;
}
/// This is called when we see this pattern:
@@ -1479,14 +1478,14 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
// of the high bits truncated out of x are known.
unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
SrcBits = X->getType()->getScalarSizeInBits();
- APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
- computeKnownBits(X, KnownZero, KnownOne, 0, &Cmp);
+ KnownBits Known(SrcBits);
+ computeKnownBits(X, Known, 0, &Cmp);
// If all the high bits are known, we can do this xform.
- if ((KnownZero | KnownOne).countLeadingOnes() >= SrcBits - DstBits) {
+ if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
// Pull in the high bits from known-ones set.
APInt NewRHS = C->zext(SrcBits);
- NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
+ NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
}
}
@@ -4001,16 +4000,16 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
IsSignBit = isSignBitCheck(Pred, *CmpC, UnusedBit);
}
- APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
- APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+ KnownBits Op0Known(BitWidth);
+ KnownBits Op1Known(BitWidth);
if (SimplifyDemandedBits(&I, 0,
getDemandedBitsLHSMask(I, BitWidth, IsSignBit),
- Op0KnownZero, Op0KnownOne, 0))
+ Op0Known, 0))
return &I;
if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
- Op1KnownZero, Op1KnownOne, 0))
+ Op1Known, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -4019,15 +4018,11 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
if (I.isSigned()) {
- computeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min,
- Op0Max);
- computeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min,
- Op1Max);
+ computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
+ computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
} else {
- computeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min,
- Op0Max);
- computeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min,
- Op1Max);
+ computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
+ computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
}
// If Min and Max are known to be the same, then SimplifyDemandedBits
@@ -4054,8 +4049,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
// If all bits are known zero except for one, then we know at most one bit
// is set. If the comparison is against zero, then this is a check to see if
// *that* bit is set.
- APInt Op0KnownZeroInverted = ~Op0KnownZero;
- if (~Op1KnownZero == 0) {
+ APInt Op0KnownZeroInverted = ~Op0Known.Zero;
+ if (~Op1Known.Zero == 0) {
// If the LHS is an AND with the same constant, look through it.
Value *LHS = nullptr;
const APInt *LHSC;
@@ -4193,8 +4188,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
// Turn a signed comparison into an unsigned one if both operands are known to
// have the same sign.
if (I.isSigned() &&
- ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
- (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
+ (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 71000063ab3cb..776686d3d1171 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -489,10 +489,9 @@ public:
return nullptr; // Don't do anything with FI
}
- void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ void computeKnownBits(Value *V, KnownBits &Known,
unsigned Depth, Instruction *CxtI) const {
- return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI,
- &DT);
+ return llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
@@ -536,24 +535,23 @@ private:
/// \brief Attempts to replace V with a simpler value based on the demanded
/// bits.
- Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth,
- Instruction *CxtI);
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
+ unsigned Depth, Instruction *CxtI);
bool SimplifyDemandedBits(Instruction *I, unsigned Op,
- const APInt &DemandedMask, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth = 0);
+ const APInt &DemandedMask, KnownBits &Known,
+ unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
/// bits. It also tries to handle simplifications that can be done based on
/// DemandedMask, but without modifying the Instruction.
Value *SimplifyMultipleUseDemandedBits(Instruction *I,
const APInt &DemandedMask,
- APInt &KnownZero, APInt &KnownOne,
+ KnownBits &Known,
unsigned Depth, Instruction *CxtI);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
/// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
- Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
- const APInt &DemandedMask, APInt &KnownZero,
- APInt &KnownOne);
+ Value *simplifyShrShlDemandedBits(
+ Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
+ const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known);
/// \brief Tries to simplify operands to an integer instruction based on its
/// demanded bits.
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5d6d899da4b5f..76829c5e457bf 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -1476,11 +1477,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// The motivation for this call into value tracking is to take advantage of
// the assumption cache, so make sure that is populated.
if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
- APInt KnownOne(1, 0), KnownZero(1, 0);
- computeKnownBits(CondVal, KnownZero, KnownOne, 0, &SI);
- if (KnownOne == 1)
+ KnownBits Known(1);
+ computeKnownBits(CondVal, Known, 0, &SI);
+ if (Known.One == 1)
return replaceInstUsesWith(SI, TrueVal);
- if (KnownZero == 1)
+ if (Known.Zero == 1)
return replaceInstUsesWith(SI, FalseVal);
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 2ba052b7e02d3..8d0ed8532779e 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -26,7 +27,7 @@ using namespace llvm::PatternMatch;
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return true.
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
- APInt Demanded) {
+ const APInt &Demanded) {
assert(I && "No instruction?");
assert(OpNo < I->getNumOperands() && "Operand index too large");
@@ -37,13 +38,11 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
return false;
// If there are no bits set that aren't demanded, nothing to do.
- Demanded = Demanded.zextOrTrunc(C->getBitWidth());
if (C->isSubsetOf(Demanded))
return false;
// This instruction is producing bits that are not demanded. Shrink the RHS.
- Demanded &= *C;
- I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded));
+ I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
return true;
}
@@ -54,10 +53,10 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
/// the instruction has any properties that allow us to simplify its operands.
bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ KnownBits Known(BitWidth);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
- Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
@@ -70,11 +69,11 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
/// change and false otherwise.
bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
const APInt &DemandedMask,
- APInt &KnownZero, APInt &KnownOne,
+ KnownBits &Known,
unsigned Depth) {
Use &U = I->getOperandUse(OpNo);
- Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
- KnownOne, Depth, I);
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
+ Depth, I);
if (!NewVal) return false;
U = NewVal;
return true;
@@ -88,15 +87,16 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
/// with a constant or one of its operands. In such cases, this function does
/// the replacement and returns true. In all other cases, it returns false after
/// analyzing the expression and setting KnownOne and known to be one in the
-/// expression. KnownZero contains all the bits that are known to be zero in the
-/// expression. These are provided to potentially allow the caller (which might
-/// recursively be SimplifyDemandedBits itself) to simplify the expression.
-/// KnownOne and KnownZero always follow the invariant that:
-/// KnownOne & KnownZero == 0.
-/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and
-/// KnownZero may only be accurate for those bits set in DemandedMask. Note also
-/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the
-/// same.
+/// expression. Known.Zero contains all the bits that are known to be zero in
+/// the expression. These are provided to potentially allow the caller (which
+/// might recursively be SimplifyDemandedBits itself) to simplify the
+/// expression.
+/// Known.One and Known.Zero always follow the invariant that:
+/// Known.One & Known.Zero == 0.
+/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
+/// Known.Zero may only be accurate for those bits set in DemandedMask. Note
+/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
+/// be the same.
///
/// This returns null if it did not change anything and it permits no
/// simplification. This returns V itself if it did some simplification of V's
@@ -104,8 +104,7 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
/// some other non-null value if it found out that V is equal to another value
/// in the context where the specified bits are demanded, but not for all users.
Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
- APInt &KnownZero, APInt &KnownOne,
- unsigned Depth,
+ KnownBits &Known, unsigned Depth,
Instruction *CxtI) {
assert(V != nullptr && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
@@ -113,18 +112,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Type *VTy = V->getType();
assert(
(!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne "
- "must have same BitWidth");
+ Known.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask and Known must have same BitWidth");
if (isa<Constant>(V)) {
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
return nullptr;
}
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
if (DemandedMask == 0) // Not demanding any bits from V.
return UndefValue::get(VTy);
@@ -133,20 +130,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
return nullptr; // Only analyze instructions.
}
// If there are multiple uses of this value and we aren't at the root, then
// we can't do any simplifications of the operands, because DemandedMask
// only reflects the bits demanded by *one* of the users.
- if (Depth != 0 && !I->hasOneUse()) {
- return SimplifyMultipleUseDemandedBits(I, DemandedMask, KnownZero, KnownOne,
- Depth, CxtI);
- }
+ if (Depth != 0 && !I->hasOneUse())
+ return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedMask to all bits so that we can try to simplify the
@@ -157,22 +151,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
switch (I->getOpcode()) {
default:
- computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(I, Known, Depth, CxtI);
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
+ Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- APInt IKnownZero = RHSKnownZero | LHSKnownZero;
+ APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
// Output known-1 bits are only known if set in both the LHS & RHS.
- APInt IKnownOne = RHSKnownOne & LHSKnownOne;
+ APInt IKnownOne = RHSKnown.One & LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -181,33 +174,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
return I;
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Or: {
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
+ Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// Output known-0 bits are only known if clear in both the LHS & RHS.
- APInt IKnownZero = RHSKnownZero & LHSKnownZero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- APInt IKnownOne = RHSKnownOne | LHSKnownOne;
+ APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
+ // Output known-1 are known. to be set if s.et in either the LHS | RHS.
+ APInt IKnownOne = RHSKnown.One | LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -216,34 +208,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(I, 1, DemandedMask))
return I;
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Xor: {
- if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
- (RHSKnownOne & LHSKnownOne);
+ APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
+ (RHSKnown.One & LHSKnown.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
- (RHSKnownOne & LHSKnownZero);
+ APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) |
+ (RHSKnown.One & LHSKnown.Zero);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -252,15 +242,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if (DemandedMask.isSubsetOf(RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
// If all of the demanded bits are known to be zero on one side or the
// other, turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) {
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
Instruction *Or =
BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
@@ -271,10 +261,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) &&
- RHSKnownOne.isSubsetOf(LHSKnownOne)) {
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
+ RHSKnown.One.isSubsetOf(LHSKnown.One)) {
Constant *AndC = Constant::getIntegerValue(VTy,
- ~RHSKnownOne & DemandedMask);
+ ~RHSKnown.One & DemandedMask);
Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
return InsertNewInstWith(And, *I);
}
@@ -292,10 +282,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
isa<ConstantInt>(I->getOperand(1)) &&
isa<ConstantInt>(LHSInst->getOperand(1)) &&
- (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+ (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
- APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+ APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
Constant *AndC =
ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
@@ -309,9 +299,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownZero = std::move(IKnownZero);
+ Known.Zero = std::move(IKnownZero);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = std::move(IKnownOne);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Select:
@@ -321,13 +311,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
- if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
@@ -335,21 +323,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
// Only known if known in both the LHS and RHS.
- KnownOne = RHSKnownOne & LHSKnownOne;
- KnownZero = RHSKnownZero & LHSKnownZero;
+ Known.One = RHSKnown.One & LHSKnown.One;
+ Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
break;
case Instruction::Trunc: {
unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask = DemandedMask.zext(truncBf);
- KnownZero = KnownZero.zext(truncBf);
- KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
- Depth + 1))
+ Known.Zero = Known.Zero.zext(truncBf);
+ Known.One = Known.One.zext(truncBf);
+ if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
- KnownZero = KnownZero.trunc(BitWidth);
- KnownOne = KnownOne.trunc(BitWidth);
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ Known.Zero = Known.Zero.trunc(BitWidth);
+ Known.One = Known.One.trunc(BitWidth);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
break;
}
case Instruction::BitCast:
@@ -369,27 +356,25 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
break;
case Instruction::ZExt: {
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask = DemandedMask.trunc(SrcBitWidth);
- KnownZero = KnownZero.trunc(SrcBitWidth);
- KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
- Depth + 1))
+ Known.Zero = Known.Zero.trunc(SrcBitWidth);
+ Known.One = Known.One.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ Known.Zero = Known.Zero.zext(BitWidth);
+ Known.One = Known.One.zext(BitWidth);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
// The top bits are known to be zero.
- KnownZero.setBitsFrom(SrcBitWidth);
+ Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
case Instruction::SExt: {
@@ -406,27 +391,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits.setBit(SrcBitWidth-1);
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
- KnownZero = KnownZero.trunc(SrcBitWidth);
- KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne,
- Depth + 1))
+ Known.Zero = Known.Zero.trunc(SrcBitWidth);
+ Known.One = Known.One.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ Known.Zero = Known.Zero.zext(BitWidth);
+ Known.One = Known.One.zext(BitWidth);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, or if the NewBits are not demanded
// convert this into a zero extension.
- if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
// Convert to ZExt cast
CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
return InsertNewInstWith(NewCast, *I);
- } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
- KnownOne |= NewBits;
+ } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set
+ Known.One |= NewBits;
}
break;
}
@@ -440,11 +424,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// significant bit and all those below it.
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
- SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne,
- Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
- SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne,
- Depth + 1)) {
+ SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
// Disable the nsw and nuw flags here: We can no longer guarantee that
// we won't wrap after simplification. Removing the nsw/nuw flags is
// legal here because the top bit is not demanded.
@@ -456,30 +438,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If we are known to be adding/subtracting zeros to every bit below
// the highest demanded bit, we just return the other side.
- if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps)
+ if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
// We can't do this with the LHS for subtraction.
if (I->getOpcode() == Instruction::Add &&
- (DemandedFromOps & LHSKnownZero) == DemandedFromOps)
+ DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
}
// Otherwise just hand the add/sub off to computeKnownBits to fill in
// the known zeros and ones.
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
break;
}
- case Instruction::Shl:
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- {
- Value *VarX; ConstantInt *C1;
- if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
- Instruction *Shr = cast<Instruction>(I->getOperand(0));
- Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
- KnownZero, KnownOne);
- if (R)
- return R;
- }
+ case Instruction::Shl: {
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
+ const APInt *ShrAmt;
+ if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) {
+ Instruction *Shr = cast<Instruction>(I->getOperand(0));
+ if (Value *R = simplifyShrShlDemandedBits(
+ Shr, *ShrAmt, I, *SA, DemandedMask, Known))
+ return R;
}
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
@@ -492,17 +472,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (IOp->hasNoUnsignedWrap())
DemandedMaskIn.setHighBits(ShiftAmt);
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero <<= ShiftAmt;
- KnownOne <<= ShiftAmt;
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ Known.Zero <<= ShiftAmt;
+ Known.One <<= ShiftAmt;
// low bits known zero.
if (ShiftAmt)
- KnownZero.setLowBits(ShiftAmt);
+ Known.Zero.setLowBits(ShiftAmt);
}
break;
+ }
case Instruction::LShr: {
const APInt *SA;
if (match(I->getOperand(1), m_APInt(SA))) {
@@ -516,14 +496,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<LShrOperator>(I)->isExact())
DemandedMaskIn.setLowBits(ShiftAmt);
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero.lshrInPlace(ShiftAmt);
- KnownOne.lshrInPlace(ShiftAmt);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShiftAmt);
+ Known.One.lshrInPlace(ShiftAmt);
if (ShiftAmt)
- KnownZero.setHighBits(ShiftAmt); // high bits known zero.
+ Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
}
break;
}
@@ -560,15 +539,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<AShrOperator>(I)->isExact())
DemandedMaskIn.setLowBits(ShiftAmt);
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- KnownZero.lshrInPlace(ShiftAmt);
- KnownOne.lshrInPlace(ShiftAmt);
+ Known.Zero.lshrInPlace(ShiftAmt);
+ Known.One.lshrInPlace(ShiftAmt);
// Handle the sign bits.
APInt SignMask(APInt::getSignMask(BitWidth));
@@ -577,14 +555,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
- (HighBits & ~DemandedMask) == HighBits) {
+ if (BitWidth <= ShiftAmt || Known.Zero[BitWidth-ShiftAmt-1] ||
+ !DemandedMask.intersects(HighBits)) {
BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
I->getOperand(1));
LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
return InsertNewInstWith(LShr, *I);
- } else if ((KnownOne & SignMask) != 0) { // New bits are known one.
- KnownOne |= HighBits;
+ } else if (Known.One.intersects(SignMask)) { // New bits are known one.
+ Known.One |= HighBits;
}
}
break;
@@ -602,25 +580,24 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
- if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
- KnownZero = LHSKnownZero & LowBits;
- KnownOne = LHSKnownOne & LowBits;
+ Known.Zero = LHSKnown.Zero & LowBits;
+ Known.One = LHSKnown.One & LowBits;
// If LHS is non-negative or has all low bits zero, then the upper bits
// are all zero.
- if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits))
- KnownZero |= ~LowBits;
+ if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero))
+ Known.Zero |= ~LowBits;
// If LHS is negative and not all low bits are zero, then the upper bits
// are all one.
- if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0))
- KnownOne |= ~LowBits;
+ if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One))
+ Known.One |= ~LowBits;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
break;
}
}
@@ -628,22 +605,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
if (DemandedMask.isSignBitSet()) {
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
- CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
// If it's known zero, our sign bit is also zero.
- if (LHSKnownZero.isSignBitSet())
- KnownZero.setSignBit();
+ if (LHSKnown.Zero.isSignBitSet())
+ Known.Zero.setSignBit();
}
break;
case Instruction::URem: {
- APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ KnownBits Known2(BitWidth);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) ||
- SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
return I;
- unsigned Leaders = KnownZero2.countLeadingOnes();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ unsigned Leaders = Known2.Zero.countLeadingOnes();
+ Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
break;
}
case Instruction::Call:
@@ -707,56 +683,54 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return ConstantInt::getNullValue(VTy);
// We know that the upper bits are set to zero.
- KnownZero.setBitsFrom(ArgWidth);
+ Known.Zero.setBitsFrom(ArgWidth);
return nullptr;
}
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero.setBitsFrom(32);
+ Known.Zero.setBitsFrom(32);
return nullptr;
}
}
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
break;
}
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
- return Constant::getIntegerValue(VTy, KnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
+ return Constant::getIntegerValue(VTy, Known.One);
return nullptr;
}
-/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
+/// Helper routine of SimplifyDemandedUseBits. It computes Known
/// bits. It also tries to handle simplifications that can be done based on
/// DemandedMask, but without modifying the Instruction.
Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
const APInt &DemandedMask,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
unsigned Depth,
Instruction *CxtI) {
unsigned BitWidth = DemandedMask.getBitWidth();
Type *ITy = I->getType();
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ KnownBits LHSKnown(BitWidth);
+ KnownBits RHSKnown(BitWidth);
// Despite the fact that we can't simplify this instruction in all User's
- // context, we can at least compute the knownzero/knownone bits, and we can
+ // context, we can at least compute the known bits, and we can
// do simplifications that apply to *just* the one user if we know that
// this instruction has a simpler value in that context.
switch (I->getOpcode()) {
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- APInt IKnownZero = RHSKnownZero | LHSKnownZero;
+ APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
// Output known-1 bits are only known if set in both the LHS & RHS.
- APInt IKnownOne = RHSKnownOne & LHSKnownOne;
+ APInt IKnownOne = RHSKnown.One & LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -766,13 +740,13 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
- if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return I->getOperand(1);
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Or: {
@@ -780,15 +754,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
// Output known-0 bits are only known if clear in both the LHS & RHS.
- APInt IKnownZero = RHSKnownZero & LHSKnownZero;
+ APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
- APInt IKnownOne = RHSKnownOne | LHSKnownOne;
+ APInt IKnownOne = RHSKnown.One | LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -798,30 +771,29 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
- if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return I->getOperand(1);
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Xor: {
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
- (RHSKnownOne & LHSKnownOne);
+ APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
+ (RHSKnown.One & LHSKnown.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
- (RHSKnownOne & LHSKnownZero);
+ APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) |
+ (RHSKnown.One & LHSKnown.Zero);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -830,25 +802,25 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If all of the demanded bits are known zero on one side, return the
// other.
- if (DemandedMask.isSubsetOf(RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownZero = std::move(IKnownZero);
+ Known.Zero = std::move(IKnownZero);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = std::move(IKnownOne);
+ Known.One = std::move(IKnownOne);
break;
}
default:
- // Compute the KnownZero/KnownOne bits to simplify things downstream.
- computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+ // Compute the Known bits to simplify things downstream.
+ computeKnownBits(I, Known, Depth, CxtI);
// If this user is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
- return Constant::getIntegerValue(ITy, KnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
+ return Constant::getIntegerValue(ITy, Known.One);
break;
}
@@ -874,29 +846,26 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
///
/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
/// not successful.
-Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
- Instruction *Shl,
- const APInt &DemandedMask,
- APInt &KnownZero,
- APInt &KnownOne) {
-
- const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
- const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+Value *
+InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
+ Instruction *Shl, const APInt &ShlOp1,
+ const APInt &DemandedMask,
+ KnownBits &Known) {
if (!ShlOp1 || !ShrOp1)
- return nullptr; // Noop.
+ return nullptr; // No-op.
Value *VarX = Shr->getOperand(0);
Type *Ty = VarX->getType();
- unsigned BitWidth = Ty->getIntegerBitWidth();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
return nullptr; // Undef.
unsigned ShlAmt = ShlOp1.getZExtValue();
unsigned ShrAmt = ShrOp1.getZExtValue();
- KnownOne.clearAllBits();
- KnownZero.setLowBits(ShlAmt - 1);
- KnownZero &= DemandedMask;
+ Known.One.clearAllBits();
+ Known.Zero.setLowBits(ShlAmt - 1);
+ Known.Zero &= DemandedMask;
APInt BitMask1(APInt::getAllOnesValue(BitWidth));
APInt BitMask2(APInt::getAllOnesValue(BitWidth));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 81f2d9fa179f9..4729c79ca4c3d 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -60,6 +60,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -641,14 +642,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
// They do! Return "L op' R".
++NumExpand;
- // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
- if ((L == A && R == B) ||
- (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
- return Op0;
- // Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
- return V;
- // Otherwise, create a new instruction.
C = Builder->CreateBinOp(InnerOpcode, L, R);
C->takeName(&I);
return C;
@@ -666,14 +659,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
// They do! Return "L op' R".
++NumExpand;
- // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
- if ((L == B && R == C) ||
- (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
- return Op1;
- // Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
- return V;
- // Otherwise, create a new instruction.
A = Builder->CreateBinOp(InnerOpcode, L, R);
A->takeName(&I);
return A;
@@ -2196,11 +2181,10 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
// There might be assume intrinsics dominating this return that completely
// determine the value. If so, constant fold it.
- unsigned BitWidth = VTy->getPrimitiveSizeInBits();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI);
- if ((KnownZero|KnownOne).isAllOnesValue())
- RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne));
+ KnownBits Known(VTy->getPrimitiveSizeInBits());
+ computeKnownBits(ResultOp, Known, 0, &RI);
+ if ((Known.Zero|Known.One).isAllOnesValue())
+ RI.setOperand(0, Constant::getIntegerValue(VTy, Known.One));
return nullptr;
}
@@ -2279,10 +2263,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
}
unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
- unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
- unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
+ KnownBits Known(BitWidth);
+ computeKnownBits(Cond, Known, 0, &SI);
+ unsigned LeadingKnownZeros = Known.Zero.countLeadingOnes();
+ unsigned LeadingKnownOnes = Known.One.countLeadingOnes();
// Compute the number of leading bits we can ignore.
// TODO: A better way to determine this would use ComputeNumSignBits().
@@ -2879,11 +2863,10 @@ bool InstCombiner::run() {
Type *Ty = I->getType();
if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
unsigned BitWidth = Ty->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
- if ((KnownZero | KnownOne).isAllOnesValue()) {
- Constant *C = ConstantInt::get(Ty, KnownOne);
+ KnownBits Known(BitWidth);
+ computeKnownBits(I, Known, /*Depth*/0, I);
+ if ((Known.Zero | Known.One).isAllOnesValue()) {
+ Constant *C = ConstantInt::get(Ty, Known.One);
DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
" from: " << *I << '\n');
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 036dd8d39a085..b866958e3c4b8 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -265,11 +265,10 @@ static cl::opt<bool>
cl::Hidden, cl::init(false));
static cl::opt<bool>
- ClUseMachOGlobalsSection("asan-globals-live-support",
- cl::desc("Use linker features to support dead "
- "code stripping of globals "
- "(Mach-O only)"),
- cl::Hidden, cl::init(true));
+ ClUseGlobalsGC("asan-globals-live-support",
+ cl::desc("Use linker features to support dead "
+ "code stripping of globals"),
+ cl::Hidden, cl::init(true));
// Debug flags.
static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
@@ -594,13 +593,15 @@ struct AddressSanitizer : public FunctionPass {
};
class AddressSanitizerModule : public ModulePass {
- public:
+public:
explicit AddressSanitizerModule(bool CompileKernel = false,
- bool Recover = false)
+ bool Recover = false,
+ bool UseGlobalsGC = true)
: ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan),
- Recover(Recover || ClRecover) {}
+ Recover(Recover || ClRecover),
+ UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC) {}
bool runOnModule(Module &M) override;
- static char ID; // Pass identification, replacement for typeid
+ static char ID; // Pass identification, replacement for typeid
StringRef getPassName() const override { return "AddressSanitizerModule"; }
private:
@@ -635,6 +636,7 @@ private:
GlobalsMetadata GlobalsMD;
bool CompileKernel;
bool Recover;
+ bool UseGlobalsGC;
Type *IntptrTy;
LLVMContext *C;
Triple TargetTriple;
@@ -913,9 +915,10 @@ INITIALIZE_PASS(
"ModulePass",
false, false)
ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
- bool Recover) {
+ bool Recover,
+ bool UseGlobalsGC) {
assert(!CompileKernel || Recover);
- return new AddressSanitizerModule(CompileKernel, Recover);
+ return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1537,9 +1540,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// binary in order to allow the linker to properly dead strip. This is only
// supported on recent versions of ld64.
bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
- if (!ClUseMachOGlobalsSection)
- return false;
-
if (!TargetTriple.isOSBinFormatMachO())
return false;
@@ -1911,9 +1911,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
Initializers[i] = Initializer;
}
- if (TargetTriple.isOSBinFormatCOFF()) {
+ if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
- } else if (ShouldUseMachOGlobalsSection()) {
+ } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
} else {
InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 61d627673c907..d7eb857cff7e1 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -943,14 +943,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
BasicBlock *BB = MI->getParent();
DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
DEBUG(dbgs() << *BB << "\n");
+ auto OrigBBFreq = BFI.getBlockFreq(BB);
BasicBlock *DefaultBB = SplitBlock(BB, MI);
BasicBlock::iterator It(*MI);
++It;
assert(It != DefaultBB->end());
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
- DefaultBB->setName("MemOP.Default");
MergeBB->setName("MemOP.Merge");
+ BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
+ DefaultBB->setName("MemOP.Default");
auto &Ctx = Func.getContext();
IRBuilder<> IRB(BB);
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index a5afc8ad977cb..c1bbc4e96b16d 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -351,8 +351,10 @@ bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
const Value *Ptr,
ProvenanceAnalysis &PA,
ARCInstKind Class) {
- // Check for possible releases.
- if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ // Check for possible releases. Treat clang.arc.use as a releasing instruction
+ // to prevent sinking a retain past it.
+ if (!CanAlterRefCount(Inst, Ptr, PA, Class) &&
+ Class != ARCInstKind::IntrinsicUser)
return false;
DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index ee6333e88716b..f62e111460ca0 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -53,6 +53,12 @@ using namespace consthoist;
STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
+static cl::opt<bool> ConstHoistWithBlockFrequency(
+ "consthoist-with-block-frequency", cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to reduce the "
+ "chance to execute const materialization more frequently than "
+ "without hoisting."));
+
namespace {
/// \brief The constant hoisting pass.
class ConstantHoistingLegacyPass : public FunctionPass {
@@ -68,6 +74,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ if (ConstHoistWithBlockFrequency)
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -82,6 +90,7 @@ private:
char ConstantHoistingLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
@@ -99,9 +108,13 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
- bool MadeChange = Impl.runImpl(
- Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
- getAnalysis<DominatorTreeWrapperPass>().getDomTree(), Fn.getEntryBlock());
+ bool MadeChange =
+ Impl.runImpl(Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ ConstHoistWithBlockFrequency
+ ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
+ : nullptr,
+ Fn.getEntryBlock());
if (MadeChange) {
DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -148,33 +161,142 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
return IDom->getBlock()->getTerminator();
}
+/// \brief Given \p BBs as input, find another set of BBs which collectively
+/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
+/// set found in \p BBs.
+void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
+ BasicBlock *Entry,
+ SmallPtrSet<BasicBlock *, 8> &BBs) {
+ assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
+ // Nodes on the current path to the root.
+ SmallPtrSet<BasicBlock *, 8> Path;
+ // Candidates includes any block 'BB' in set 'BBs' that is not strictly
+ // dominated by any other blocks in set 'BBs', and all nodes in the path
+ // in the dominator tree from Entry to 'BB'.
+ SmallPtrSet<BasicBlock *, 16> Candidates;
+ for (auto BB : BBs) {
+ Path.clear();
+ // Walk up the dominator tree until Entry or another BB in BBs
+ // is reached. Insert the nodes on the way to the Path.
+ BasicBlock *Node = BB;
+ // The "Path" is a candidate path to be added into Candidates set.
+ bool isCandidate = false;
+ do {
+ Path.insert(Node);
+ if (Node == Entry || Candidates.count(Node)) {
+ isCandidate = true;
+ break;
+ }
+ assert(DT.getNode(Node)->getIDom() &&
+ "Entry doens't dominate current Node");
+ Node = DT.getNode(Node)->getIDom()->getBlock();
+ } while (!BBs.count(Node));
+
+ // If isCandidate is false, Node is another Block in BBs dominating
+ // current 'BB'. Drop the nodes on the Path.
+ if (!isCandidate)
+ continue;
+
+ // Add nodes on the Path into Candidates.
+ Candidates.insert(Path.begin(), Path.end());
+ }
+
+ // Sort the nodes in Candidates in top-down order and save the nodes
+ // in Orders.
+ unsigned Idx = 0;
+ SmallVector<BasicBlock *, 16> Orders;
+ Orders.push_back(Entry);
+ while (Idx != Orders.size()) {
+ BasicBlock *Node = Orders[Idx++];
+ for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
+ if (Candidates.count(ChildDomNode->getBlock()))
+ Orders.push_back(ChildDomNode->getBlock());
+ }
+ }
+
+ // Visit Orders in bottom-up order.
+ typedef std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>
+ InsertPtsCostPair;
+ // InsertPtsMap is a map from a BB to the best insertion points for the
+ // subtree of BB (subtree not including the BB itself).
+ DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
+ InsertPtsMap.reserve(Orders.size() + 1);
+ for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
+ BasicBlock *Node = *RIt;
+ bool NodeInBBs = BBs.count(Node);
+ SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
+ BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
+
+ // Return the optimal insert points in BBs.
+ if (Node == Entry) {
+ BBs.clear();
+ if (InsertPtsFreq > BFI.getBlockFreq(Node))
+ BBs.insert(Entry);
+ else
+ BBs.insert(InsertPts.begin(), InsertPts.end());
+ break;
+ }
+
+ BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
+ // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
+ // will update its parent's ParentInsertPts and ParentPtsFreq.
+ SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
+ BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
+ // Choose to insert in Node or in subtree of Node.
+ if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) {
+ ParentInsertPts.insert(Node);
+ ParentPtsFreq += BFI.getBlockFreq(Node);
+ } else {
+ ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
+ ParentPtsFreq += InsertPtsFreq;
+ }
+ }
+}
+
/// \brief Find an insertion point that dominates all uses.
-Instruction *ConstantHoistingPass::findConstantInsertionPoint(
+SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
const ConstantInfo &ConstInfo) const {
assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
// Collect all basic blocks.
SmallPtrSet<BasicBlock *, 8> BBs;
+ SmallPtrSet<Instruction *, 8> InsertPts;
for (auto const &RCI : ConstInfo.RebasedConstants)
for (auto const &U : RCI.Uses)
BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
- if (BBs.count(Entry))
- return &Entry->front();
+ if (BBs.count(Entry)) {
+ InsertPts.insert(&Entry->front());
+ return InsertPts;
+ }
+
+ if (BFI) {
+ findBestInsertionSet(*DT, *BFI, Entry, BBs);
+ for (auto BB : BBs) {
+ BasicBlock::iterator InsertPt = BB->begin();
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+ ;
+ InsertPts.insert(&*InsertPt);
+ }
+ return InsertPts;
+ }
while (BBs.size() >= 2) {
BasicBlock *BB, *BB1, *BB2;
BB1 = *BBs.begin();
BB2 = *std::next(BBs.begin());
BB = DT->findNearestCommonDominator(BB1, BB2);
- if (BB == Entry)
- return &Entry->front();
+ if (BB == Entry) {
+ InsertPts.insert(&Entry->front());
+ return InsertPts;
+ }
BBs.erase(BB1);
BBs.erase(BB2);
BBs.insert(BB);
}
assert((BBs.size() == 1) && "Expected only one element.");
Instruction &FirstInst = (*BBs.begin())->front();
- return findMatInsertPt(&FirstInst);
+ InsertPts.insert(findMatInsertPt(&FirstInst));
+ return InsertPts;
}
@@ -557,29 +679,54 @@ bool ConstantHoistingPass::emitBaseConstants() {
bool MadeChange = false;
for (auto const &ConstInfo : ConstantVec) {
// Hoist and hide the base constant behind a bitcast.
- Instruction *IP = findConstantInsertionPoint(ConstInfo);
- IntegerType *Ty = ConstInfo.BaseConstant->getType();
- Instruction *Base =
- new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
- DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB "
- << IP->getParent()->getName() << '\n' << *Base << '\n');
- NumConstantsHoisted++;
+ SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
+ assert(!IPSet.empty() && "IPSet is empty");
+
+ unsigned UsesNum = 0;
+ unsigned ReBasesNum = 0;
+ for (Instruction *IP : IPSet) {
+ IntegerType *Ty = ConstInfo.BaseConstant->getType();
+ Instruction *Base =
+ new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
+ DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
+ << ") to BB " << IP->getParent()->getName() << '\n'
+ << *Base << '\n');
+
+ // Emit materialization code for all rebased constants.
+ unsigned Uses = 0;
+ for (auto const &RCI : ConstInfo.RebasedConstants) {
+ for (auto const &U : RCI.Uses) {
+ Uses++;
+ BasicBlock *OrigMatInsertBB =
+ findMatInsertPt(U.Inst, U.OpndIdx)->getParent();
+ // If Base constant is to be inserted in multiple places,
+ // generate rebase for U using the Base dominating U.
+ if (IPSet.size() == 1 ||
+ DT->dominates(Base->getParent(), OrigMatInsertBB)) {
+ emitBaseConstants(Base, RCI.Offset, U);
+ ReBasesNum++;
+ }
+ }
+ }
+ UsesNum = Uses;
- // Emit materialization code for all rebased constants.
- for (auto const &RCI : ConstInfo.RebasedConstants) {
- NumConstantsRebased++;
- for (auto const &U : RCI.Uses)
- emitBaseConstants(Base, RCI.Offset, U);
+ // Use the same debug location as the last user of the constant.
+ assert(!Base->use_empty() && "The use list is empty!?");
+ assert(isa<Instruction>(Base->user_back()) &&
+ "All uses should be instructions.");
+ Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
}
+ (void)UsesNum;
+ (void)ReBasesNum;
+ // Expect all uses are rebased after rebase is done.
+ assert(UsesNum == ReBasesNum && "Not all uses are rebased");
+
+ NumConstantsHoisted++;
- // Use the same debug location as the last user of the constant.
- assert(!Base->use_empty() && "The use list is empty!?");
- assert(isa<Instruction>(Base->user_back()) &&
- "All uses should be instructions.");
- Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
+ // Base constant is also included in ConstInfo.RebasedConstants, so
+ // deduct 1 from ConstInfo.RebasedConstants.size().
+ NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
- // Correct for base constant, which we counted above too.
- NumConstantsRebased--;
MadeChange = true;
}
return MadeChange;
@@ -595,9 +742,11 @@ void ConstantHoistingPass::deleteDeadCastInst() const {
/// \brief Optimize expensive integer constants in the given function.
bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
- DominatorTree &DT, BasicBlock &Entry) {
+ DominatorTree &DT, BlockFrequencyInfo *BFI,
+ BasicBlock &Entry) {
this->TTI = &TTI;
this->DT = &DT;
+ this->BFI = BFI;
this->Entry = &Entry;
// Collect all constant candidates.
collectConstantCandidates(Fn);
@@ -628,7 +777,10 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!runImpl(F, TTI, DT, F.getEntryBlock()))
+ auto BFI = ConstHoistWithBlockFrequency
+ ? &AM.getResult<BlockFrequencyAnalysis>(F)
+ : nullptr;
+ if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index c843c61ea94ed..b5a4cc2f39533 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -26,6 +26,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -95,7 +96,8 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
return true;
}
-static bool processPHI(PHINode *P, LazyValueInfo *LVI) {
+static bool processPHI(PHINode *P, LazyValueInfo *LVI,
+ const SimplifyQuery &SQ) {
bool Changed = false;
BasicBlock *BB = P->getParent();
@@ -149,9 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI) {
Changed = true;
}
- // FIXME: Provide TLI, DT, AT to SimplifyInstruction.
- const DataLayout &DL = BB->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(P, DL)) {
+ if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
@@ -488,9 +488,8 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
ConstantInt::getFalse(C->getContext());
}
-static bool runImpl(Function &F, LazyValueInfo *LVI) {
+static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) {
bool FnChanged = false;
-
// Visiting in a pre-order depth-first traversal causes us to simplify early
// blocks before querying later blocks (which require us to analyze early
// blocks). Eagerly simplifying shallow blocks means there is strictly less
@@ -505,7 +504,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) {
BBChanged |= processSelect(cast<SelectInst>(II), LVI);
break;
case Instruction::PHI:
- BBChanged |= processPHI(cast<PHINode>(II), LVI);
+ BBChanged |= processPHI(cast<PHINode>(II), LVI, SQ);
break;
case Instruction::ICmp:
case Instruction::FCmp:
@@ -566,14 +565,25 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
return false;
LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- return runImpl(F, LVI);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+ auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
+ const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
+ return runImpl(F, LVI, SQ);
}
PreservedAnalyses
CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
- bool Changed = runImpl(F, LVI);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
+ auto *AC = AM.getCachedResult<AssumptionAnalysis>(F);
+ const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
+ bool Changed = runImpl(F, LVI, SQ);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 7019287954a15..48eda09c463e2 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -51,6 +51,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -537,9 +538,9 @@ bool GuardWideningImpl::parseRangeChecks(
} else if (match(Check.getBase(),
m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(OpLHS, KnownZero, KnownOne, DL);
- if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) {
+ KnownBits Known(BitWidth);
+ computeKnownBits(OpLHS, Known, DL);
+ if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
Check.setBase(OpLHS);
APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
Check.setOffset(ConstantInt::get(Ctx, NewOffset));
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 5d8701431a2ce..9e2563879da2b 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -152,15 +152,15 @@ private:
Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
- Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const;
+ Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const;
bool rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV, Value *NewV) const;
void collectRewritableIntrinsicOperands(
IntrinsicInst *II,
- std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const;
+ std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const;
std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
@@ -204,7 +204,6 @@ static bool isAddressExpression(const Value &V) {
//
// Precondition: V is an address expression.
static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
- assert(isAddressExpression(V));
const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
@@ -254,8 +253,8 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
// TODO: Move logic to TTI?
void InferAddressSpaces::collectRewritableIntrinsicOperands(
- IntrinsicInst *II, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const {
+ IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const {
switch (II->getIntrinsicID()) {
case Intrinsic::objectsize:
case Intrinsic::amdgcn_atomic_inc:
@@ -272,13 +271,13 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands(
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
- Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const {
+ Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const {
assert(V->getType()->isPointerTy());
if (isAddressExpression(*V) &&
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
- if (Visited->insert(V).second)
- PostorderStack->push_back(std::make_pair(V, false));
+ if (Visited.insert(V).second)
+ PostorderStack.push_back(std::make_pair(V, false));
}
}
@@ -293,14 +292,18 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
DenseSet<Value *> Visited;
auto PushPtrOperand = [&](Value *Ptr) {
- appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack,
- &Visited);
+ appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack,
+ Visited);
};
- // We only explore address expressions that are reachable from loads and
- // stores for now because we aim at generating faster loads and stores.
+ // Look at operations that may be interesting accelerate by moving to a known
+ // address space. We aim at generating after loads and stores, but pure
+ // addressing calculations may also be faster.
for (Instruction &I : instructions(F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (!GEP->getType()->isVectorTy())
+ PushPtrOperand(GEP->getPointerOperand());
+ } else if (auto *LI = dyn_cast<LoadInst>(&I))
PushPtrOperand(LI->getPointerOperand());
else if (auto *SI = dyn_cast<StoreInst>(&I))
PushPtrOperand(SI->getPointerOperand());
@@ -316,7 +319,7 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
if (auto *MTI = dyn_cast<MemTransferInst>(MI))
PushPtrOperand(MTI->getRawSource());
} else if (auto *II = dyn_cast<IntrinsicInst>(&I))
- collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
+ collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
// FIXME: Handle vectors of pointers
if (Cmp->getOperand(0)->getType()->isPointerTy()) {
@@ -338,8 +341,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().second = true;
for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
- appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack,
- &Visited);
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
+ Visited);
}
}
return Postorder;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 08eb95a1a3d3e..a0da81605a809 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1289,6 +1289,36 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (PredToDestList.empty())
return false;
+ // If all the predecessors go to a single known successor, we want to fold,
+ // not thread. By doing so, we do not need to duplicate the current block and
+ // also miss potential opportunities in case we dont/cant duplicate.
+ if (OnlyDest && OnlyDest != MultipleDestSentinel) {
+ if (PredToDestList.size() ==
+ (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
+ bool SeenFirstBranchToOnlyDest = false;
+ for (BasicBlock *SuccBB : successors(BB)) {
+ if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest)
+ SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
+ else
+ SuccBB->removePredecessor(BB, true); // This is unreachable successor.
+ }
+
+ // Finally update the terminator.
+ TerminatorInst *Term = BB->getTerminator();
+ BranchInst::Create(OnlyDest, Term);
+ Term->eraseFromParent();
+
+ // If the condition is now dead due to the removal of the old terminator,
+ // erase it.
+ auto *CondInst = dyn_cast<Instruction>(Cond);
+ if (CondInst && CondInst->use_empty())
+ CondInst->eraseFromParent();
+ // FIXME: in case this instruction is defined in the current BB and it
+ // resolves to a single value from all predecessors, we can do RAUW.
+ return true;
+ }
+ }
+
// Determine which is the most common successor. If we have many inputs and
// this block is a switch, we want to start by threading the batch that goes
// to the most popular destination first. If we only know about one
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 946d85d7360fd..5042fc18d7c49 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -345,6 +345,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
if (!SI->isSimple())
return false;
+ // Don't convert stores of non-integral pointer types to memsets (which stores
+ // integers).
+ if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
+ return false;
+
// Avoid merging nontemporal stores.
if (SI->getMetadata(LLVMContext::MD_nontemporal))
return false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e5689368de80d..8ce96cf1b7a67 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -58,13 +58,14 @@ class LoopRotate {
AssumptionCache *AC;
DominatorTree *DT;
ScalarEvolution *SE;
+ const SimplifyQuery &SQ;
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE)
- : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) {
- }
+ DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ)
+ : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
+ SQ(SQ) {}
bool processLoop(Loop *L);
private:
@@ -311,8 +312,6 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
@@ -342,8 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
- Value *V = SimplifyInstruction(C, DL);
+ Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C));
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
@@ -671,7 +669,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
- LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE);
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC);
+ LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ);
bool Changed = LR.processLoop(&L);
if (!Changed)
@@ -714,7 +714,11 @@ public:
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
- LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE);
+ auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const SimplifyQuery SQ(DL, TLI, DT, AC);
+ LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
return LR.processLoop(L);
}
};
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index a99c9999c6191..8fa806a7e8bcc 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This pass transforms loops that contain branches on loop-invariant conditions
-// to have multiple loops. For example, it turns the left into the right code:
+// to multiple loops. For example, it turns the left into the right code:
//
// for (...) if (lic)
// A for (...)
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 659353e912fe0..49ce0262c97b0 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -352,20 +352,10 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
- for (User *U : Condition->users()) {
- if (Instruction *I = dyn_cast<Instruction>(U)) {
+ for (User *U : Condition->users())
+ if (Instruction *I = dyn_cast<Instruction>(U))
if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
return I;
- }
- }
-
- // Avoid creating a new instruction in the common case of a compare.
- if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
- if (Cmp->hasOneUse()) {
- Cmp->setPredicate(Cmp->getInversePredicate());
- return Cmp;
- }
- }
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 1cfe3bd536482..7ffdad597a9b8 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V,
unsigned HiBits = LongLen - ShortLen;
const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
- APInt Zeros(LongLen, 0), Ones(LongLen, 0);
+ KnownBits Known(LongLen);
- computeKnownBits(V, Zeros, Ones, DL);
+ computeKnownBits(V, Known, DL);
- if (Zeros.countLeadingOnes() >= HiBits)
+ if (Known.Zero.countLeadingOnes() >= HiBits)
return VALRNG_KNOWN_SHORT;
- if (Ones.countLeadingZeros() < HiBits)
+ if (Known.One.countLeadingZeros() < HiBits)
return VALRNG_LIKELY_LONG;
// Long integer divisions are often used in hashtable implementations. It's
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 82552684b832f..ed72099ec3ed6 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -73,24 +73,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
}
/// \brief Build a set of blocks to extract if the input blocks are viable.
-template <typename IteratorT>
-static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
- IteratorT BBEnd) {
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
+ assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
SetVector<BasicBlock *> Result;
- assert(BBBegin != BBEnd);
-
// Loop over the blocks, adding them to our set-vector, and aborting with an
// empty set if we encounter invalid blocks.
- do {
- if (!Result.insert(*BBBegin))
- llvm_unreachable("Repeated basic blocks in extraction input");
+ for (BasicBlock *BB : BBs) {
- if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) {
+ // If this block is dead, don't process it.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
+ if (!Result.insert(BB))
+ llvm_unreachable("Repeated basic blocks in extraction input");
+ if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
Result.clear();
return Result;
}
- } while (++BBBegin != BBEnd);
+ }
#ifndef NDEBUG
for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
@@ -106,23 +108,17 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
return Result;
}
-/// \brief Helper to call buildExtractionBlockSet with an ArrayRef.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
- return buildExtractionBlockSet(BBs.begin(), BBs.end());
-}
-
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
+ BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),
+ BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),
NumExitBlocks(~0U) {}
/// definedInRegion - Return true if the specified value is defined in the
@@ -194,9 +190,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// containing PHI nodes merging values from outside of the region, and a
// second that contains all of the code for the block and merges back any
// incoming values from inside of the region.
- BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();
- BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
- Header->getName()+".ce");
+ BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);
// We only want to code extract the second block now, and it becomes the new
// header of the region.
@@ -205,11 +199,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
Blocks.insert(NewBB);
Header = NewBB;
- // Okay, update dominator sets. The blocks that dominate the new one are the
- // blocks that dominate TIBB plus the new block itself.
- if (DT)
- DT->splitBlock(NewBB);
-
// Okay, now we need to adjust the PHI nodes and any branches from within the
// region to go to the new header block instead of the old header block.
if (NumPredsFromRegion) {
@@ -224,12 +213,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// Okay, everything within the region is now branching to the right block, we
// just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ BasicBlock::iterator AfterPHIs;
for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
PHINode *PN = cast<PHINode>(AfterPHIs);
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
PN->getName() + ".ce", &NewBB->front());
+ PN->replaceAllUsesWith(NewPN);
NewPN->addIncoming(PN, OldPred);
// Loop over all of the incoming value in PN, moving them to NewPN if they
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8c5442762643b..d3002c5fb7502 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -1038,9 +1039,9 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
"getOrEnforceKnownAlignment expects a pointer!");
unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);
- unsigned TrailZ = KnownZero.countTrailingOnes();
+ KnownBits Known(BitWidth);
+ computeKnownBits(V, Known, DL, 0, AC, CxtI, DT);
+ unsigned TrailZ = Known.Zero.countTrailingOnes();
// Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
@@ -1268,21 +1269,37 @@ static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
}
}
-/// Prepend \p DIExpr with a deref and offset operation.
+enum { WithStackValue = true };
+
+/// Prepend \p DIExpr with a deref and offset operation and optionally turn it
+/// into a stack value.
static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
- bool Deref, int64_t Offset) {
- if (!Deref && !Offset)
+ bool Deref, int64_t Offset = 0,
+ bool StackValue = false) {
+ if (!Deref && !Offset && !StackValue)
return DIExpr;
- // Create a copy of the original DIDescriptor for user variable, prepending
- // "deref" operation to a list of address elements, as new llvm.dbg.declare
- // will take a value storing address of the memory for variable, not
- // alloca itself.
- SmallVector<uint64_t, 4> Ops;
+
+ SmallVector<uint64_t, 8> Ops;
+ appendOffset(Ops, Offset);
if (Deref)
Ops.push_back(dwarf::DW_OP_deref);
- appendOffset(Ops, Offset);
if (DIExpr)
- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ for (auto Op : DIExpr->expr_ops()) {
+ // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.
+ if (StackValue) {
+ if (Op.getOp() == dwarf::DW_OP_stack_value)
+ StackValue = false;
+ else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
+ Ops.push_back(dwarf::DW_OP_stack_value);
+ StackValue = false;
+ }
+ }
+ Ops.push_back(Op.getOp());
+ for (unsigned I = 0; I < Op.getNumArgs(); ++I)
+ Ops.push_back(Op.getArg(I));
+ }
+ if (StackValue)
+ Ops.push_back(dwarf::DW_OP_stack_value);
return Builder.createExpression(Ops);
}
@@ -1374,12 +1391,15 @@ void llvm::salvageDebugInfo(Instruction &I) {
unsigned BitWidth =
M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
APInt Offset(BitWidth, 0);
- // Rewrite a constant GEP into a DIExpression.
+ // Rewrite a constant GEP into a DIExpression. Since we are performing
+ // arithmetic to compute the variable's *value* in the DIExpression, we
+ // need to mark the expression with a DW_OP_stack_value.
if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
- // GEP offsets are i32 and thus alwaus fit into an int64_t.
- DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());
+ // GEP offsets are i32 and thus always fit into an int64_t.
+ DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(),
+ WithStackValue);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
@@ -1391,7 +1411,7 @@ void llvm::salvageDebugInfo(Instruction &I) {
// Rewrite the load into DW_OP_deref.
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
- DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);
+ DIExpr = prependDIExpr(DIB, DIExpr, WithDeref);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 3c669ce644e20..43ab725b07691 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
return false;
}
+ // The current loop unroll pass can only unroll loops with a single latch
+ // that's a conditional branch exiting the loop.
+ // FIXME: The implementation can be extended to work with more complicated
+ // cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
@@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
return false;
}
+ auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
+ return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+ };
+
+ if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
+ DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
+ " exiting the loop can be unrolled\n");
+ return false;
+ }
+
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index b375d51005d57..8959e77438e99 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
+ // Don't handle unreachable blocks. If there are successors with phis, this
+ // would leave them behind with missing predecessors.
+ if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||
+ CurBlock->getSinglePredecessor() == CurBlock) {
+ DeleteList.insert(CurBlock);
+ return;
+ }
+
// If there is only the default destination, just branch.
if (!SI->getNumCases()) {
BranchInst::Create(Default, CurBlock);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2f575b9d50272..f86e97b6cc72f 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -60,6 +60,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -3055,6 +3056,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
BasicBlock *QFB = QBI->getSuccessor(1);
BasicBlock *PostBB = QFB->getSingleSuccessor();
+ // Make sure we have a good guess for PostBB. If QTB's only successor is
+ // QFB, then QFB is a better PostBB.
+ if (QTB->getSingleSuccessor() == QFB)
+ PostBB = QFB;
+
+ // If we couldn't find a good PostBB, stop.
+ if (!PostBB)
+ return false;
+
bool InvertPCond = false, InvertQCond = false;
// Canonicalize fallthroughs to the true branches.
if (PFB == QBI->getParent()) {
@@ -3079,8 +3089,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
};
- if (!PostBB ||
- !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+ if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
!HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
return false;
if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
@@ -3746,7 +3755,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
if (!isa<DbgInfoIntrinsic>(I))
return false;
- SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;
+ SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
auto *PhiLPInst = cast<PHINode>(RI->getValue());
// Check incoming blocks to see if any of them are trivial.
@@ -4359,8 +4368,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
- APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI);
+ KnownBits Known(Bits);
+ computeKnownBits(Cond, Known, DL, 0, AC, SI);
// We can also eliminate cases by determining that their values are outside of
// the limited range of the condition based on how many significant (non-sign)
@@ -4372,7 +4381,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
SmallVector<ConstantInt *, 8> DeadCases;
for (auto &Case : SI->cases()) {
APInt CaseVal = Case.getCaseValue()->getValue();
- if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne ||
+ if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n");
@@ -4386,7 +4395,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
const unsigned NumUnknownBits =
- Bits - (KnownZero | KnownOne).countPopulation();
+ Bits - (Known.Zero | Known.One).countPopulation();
assert(NumUnknownBits <= Bits);
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index f6070868de44e..27373427d4f7d 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -35,10 +35,8 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of redundant instructions removed");
-static bool runImpl(Function &F, const DominatorTree *DT,
- const TargetLibraryInfo *TLI, AssumptionCache *AC,
+static bool runImpl(Function &F, const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
- const DataLayout &DL = F.getParent()->getDataLayout();
SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -56,7 +54,8 @@ static bool runImpl(Function &F, const DominatorTree *DT,
// Don't waste time simplifying unused instructions.
if (!I->use_empty()) {
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) {
+ if (Value *V =
+ SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
@@ -65,7 +64,7 @@ static bool runImpl(Function &F, const DominatorTree *DT,
Changed = true;
}
}
- if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) {
+ if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) {
// RecursivelyDeleteTriviallyDeadInstruction can remove more than one
// instruction, so simply incrementing the iterator does not work.
// When instructions get deleted re-iterate instead.
@@ -113,8 +112,9 @@ namespace {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
OptimizationRemarkEmitter *ORE =
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return runImpl(F, DT, TLI, AC, ORE);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const SimplifyQuery SQ(DL, TLI, DT, AC);
+ return runImpl(F, SQ, ORE);
}
};
}
@@ -141,7 +141,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const SimplifyQuery SQ(DL, &TLI, &DT, &AC);
+ bool Changed = runImpl(F, SQ, &ORE);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index aa71e3669ea27..2c1c30463a233 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -37,10 +38,6 @@ using namespace llvm;
using namespace PatternMatch;
static cl::opt<bool>
- ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden,
- cl::desc("Treat error-reporting calls as cold"));
-
-static cl::opt<bool>
EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
cl::init(false),
cl::desc("Enable unsafe double to float "
@@ -459,11 +456,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
Value *Offset = GEP->getOperand(2);
unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI,
- nullptr);
- KnownZero.flipAllBits();
+ KnownBits Known(BitWidth);
+ computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr);
+ Known.Zero.flipAllBits();
size_t ArrSize =
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
@@ -477,7 +472,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
// optimize if we can prove that the program has undefined behavior when
// Offset is outside that range. That is the case when GEP->getOperand(0)
// is a pointer to an object whose memory extent is NullTermIdx+1.
- if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) ||
+ if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
(GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
NullTermIdx == ArrSize - 1))
return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
@@ -846,6 +841,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
// Is the inner call really malloc()?
Function *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return nullptr;
+
LibFunc Func;
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
Func != LibFunc_malloc)
@@ -930,6 +928,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
if (V == nullptr)
return nullptr;
+ // If call isn't an intrinsic, check that it isn't within a function with the
+ // same name as the float version of this call.
+ //
+ // e.g. inline float expf(float val) { return (float) exp((double) val); }
+ //
+ // A similar such definition exists in the MinGW-w64 math.h header file which
+ // when compiled with -O2 -ffast-math causes the generation of infinite loops
+ // where expf is called.
+ if (!Callee->isIntrinsic()) {
+ const Function *F = CI->getFunction();
+ StringRef FName = F->getName();
+ StringRef CalleeName = Callee->getName();
+ if ((FName.size() == (CalleeName.size() + 1)) &&
+ (FName.back() == 'f') &&
+ FName.startswith(CalleeName))
+ return nullptr;
+ }
+
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
@@ -1632,7 +1648,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
}
static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
- if (!ColdErrorCalls || !Callee || !Callee->isDeclaration())
+ if (!Callee || !Callee->isDeclaration())
return false;
if (StreamArg < 0)
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 4409d7a404f8b..97dcb40a1d721 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
@@ -65,7 +66,9 @@ public:
bool run();
private:
- Value *getPointerOperand(Value *I);
+ Value *getPointerOperand(Value *I) const;
+
+ GetElementPtrInst *getSourceGEP(Value *Src) const;
unsigned getPointerAddressSpace(Value *I);
@@ -215,7 +218,7 @@ bool Vectorizer::run() {
return Changed;
}
-Value *Vectorizer::getPointerOperand(Value *I) {
+Value *Vectorizer::getPointerOperand(Value *I) const {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->getPointerOperand();
if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -231,6 +234,19 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) {
return -1;
}
+GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const {
+ // First strip pointer bitcasts. Make sure pointee size is the same with
+ // and without casts.
+ // TODO: a stride set by the add instruction below can match the difference
+ // in pointee type size here. Currently it will not be vectorized.
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *SrcBase = SrcPtr->stripPointerCasts();
+ if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) ==
+ DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType()))
+ SrcPtr = SrcBase;
+ return dyn_cast<GetElementPtrInst>(SrcPtr);
+}
+
// FIXME: Merge with llvm::isConsecutiveAccess
bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
Value *PtrA = getPointerOperand(A);
@@ -283,8 +299,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
// Look through GEPs after checking they're the same except for the last
// index.
- GetElementPtrInst *GEPA = dyn_cast<GetElementPtrInst>(getPointerOperand(A));
- GetElementPtrInst *GEPB = dyn_cast<GetElementPtrInst>(getPointerOperand(B));
+ GetElementPtrInst *GEPA = getSourceGEP(A);
+ GetElementPtrInst *GEPB = getSourceGEP(B);
if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
return false;
unsigned FinalIndex = GEPA->getNumOperands() - 1;
@@ -328,11 +344,9 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
// If any bits are known to be zero other than the sign bit in OpA, we can
// add 1 to it while guaranteeing no overflow of any sort.
if (!Safe) {
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT);
- KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1);
- if (KnownZero != 0)
+ KnownBits Known(BitWidth);
+ computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
+ if (Known.Zero.countTrailingZeros() < (BitWidth - 1))
Safe = true;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7eb8fabe0b2f0..87ce0194dad6f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3586,8 +3586,12 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
IRBuilder<> B(MiddleBlock->getTerminator());
Value *CountMinusOne = B.CreateSub(
CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
- Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
- "cast.cmo");
+ Value *CMO =
+ !II.getStep()->getType()->isIntegerTy()
+ ? B.CreateCast(Instruction::SIToFP, CountMinusOne,
+ II.getStep()->getType())
+ : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
+ CMO->setName("cast.cmo");
Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
@@ -4516,14 +4520,15 @@ void InnerLoopVectorizer::predicateInstructions() {
for (auto KV : PredicatedInstructions) {
BasicBlock::iterator I(KV.first);
BasicBlock *Head = I->getParent();
- auto *BB = SplitBlock(Head, &*std::next(I), DT, LI);
auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
/*BranchWeights=*/nullptr, DT, LI);
I->moveBefore(T);
sinkScalarOperands(&*I);
- I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if");
- BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue");
+ BasicBlock *PredicatedBlock = I->getParent();
+ Twine BBNamePrefix = Twine("pred.") + I->getOpcodeName();
+ PredicatedBlock->setName(BBNamePrefix + ".if");
+ PredicatedBlock->getSingleSuccessor()->setName(BBNamePrefix + ".continue");
// If the instruction is non-void create a Phi node at reconvergence point.
if (!I->getType()->isVoidTy()) {
@@ -7324,8 +7329,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
VectorTy, VF - 1, VectorTy);
- // TODO: IF-converted IFs become selects.
- return 0;
+ // Phi nodes in non-header blocks (not inductions, reductions, etc.) are
+ // converted into select instructions. We require N - 1 selects per phi
+ // node, where N is the number of incoming values.
+ if (VF > 1 && Phi->getParent() != TheLoop->getHeader())
+ return (Phi->getNumIncomingValues() - 1) *
+ TTI.getCmpSelInstrCost(
+ Instruction::Select, ToVectorTy(Phi->getType(), VF),
+ ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF));
+
+ return TTI.getCFInstrCost(Instruction::PHI);
}
case Instruction::UDiv:
case Instruction::SDiv: