aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp192
1 files changed, 143 insertions, 49 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c888adeafca5..6778af22f532 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -23,16 +23,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -174,12 +173,11 @@ static cl::opt<bool> DisablePreheaderProtect(
cl::desc("Disable protection against removing loop preheaders"));
static cl::opt<bool> ProfileGuidedSectionPrefix(
- "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true),
cl::desc("Use profile info to add section prefix for hot/cold functions"));
static cl::opt<bool> ProfileUnknownInSpecialSection(
- "profile-unknown-in-special-section", cl::Hidden, cl::init(false),
- cl::ZeroOrMore,
+ "profile-unknown-in-special-section", cl::Hidden,
cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
"profile, we cannot tell the function is cold for sure because "
"it may be a function newly added without ever being sampled. "
@@ -188,6 +186,15 @@ static cl::opt<bool> ProfileUnknownInSpecialSection(
"to handle it in a different way than .text section, to save "
"RAM for example. "));
+static cl::opt<bool> BBSectionsGuidedSectionPrefix(
+ "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
+ cl::desc("Use the basic-block-sections profile to determine the text "
+ "section prefix for hot functions. Functions with "
+ "basic-block-sections profile will be placed in `.text.hot` "
+ "regardless of their FDO profile info. Other functions won't be "
+ "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
+ "profiles."));
+
static cl::opt<unsigned> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
@@ -274,6 +281,7 @@ class TypePromotionTransaction;
const TargetLowering *TLI = nullptr;
const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI = nullptr;
+ const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
@@ -349,6 +357,7 @@ class TypePromotionTransaction;
AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
}
private:
@@ -401,6 +410,8 @@ class TypePromotionTransaction;
bool optimizeFunnelShift(IntrinsicInst *Fsh);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
+ bool optimizeSwitchType(SwitchInst *SI);
+ bool optimizeSwitchPhiConstants(SwitchInst *SI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
@@ -442,6 +453,7 @@ char CodeGenPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -473,8 +485,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BBSectionsProfileReader =
+ getAnalysisIfAvailable<BasicBlockSectionsProfileReader>();
OptSize = F.hasOptSize();
- if (ProfileGuidedSectionPrefix) {
+ // Use the basic-block-sections profile to promote hot functions to .text.hot if requested.
+ if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
+ BBSectionsProfileReader->isFunctionHot(F.getName())) {
+ F.setSectionPrefix("hot");
+ } else if (ProfileGuidedSectionPrefix) {
// The hot attribute overwrites profile count based hotness while profile
// counts based hotness overwrite the cold attribute.
// This is a conservative behabvior.
@@ -524,7 +542,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
- EverMadeChange |= SplitIndirectBrCriticalEdges(F);
+ EverMadeChange |=
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
bool MadeChange = true;
while (MadeChange) {
@@ -2037,7 +2056,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
return false;
// Bail if the value is never zero.
- if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL))
+ Use &Op = CountZeros->getOperandUse(0);
+ if (isKnownNonZero(Op, *DL))
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
@@ -2058,7 +2078,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Replace the unconditional branch that was created by the first split with
// a compare against zero and a conditional branch.
Value *Zero = Constant::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+ // Avoid introducing branch on poison. This also replaces the ctz operand.
+ if (!isGuaranteedNotToBeUndefOrPoison(Op))
+ Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
+ Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
StartBlock->getTerminator()->eraseFromParent();
@@ -2101,7 +2124,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Align the pointer arguments to this call if the target thinks it's a good
// idea
- unsigned MinSize, PrefAlign;
+ unsigned MinSize;
+ Align PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
@@ -2115,12 +2139,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
0);
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
uint64_t Offset2 = Offset.getLimitedValue();
- if ((Offset2 & (PrefAlign-1)) != 0)
+ if (!isAligned(PrefAlign, Offset2))
continue;
AllocaInst *AI;
- if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
- AI->setAlignment(Align(PrefAlign));
+ AI->setAlignment(PrefAlign);
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
@@ -2130,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
GV->getPointerAlignment(*DL) < PrefAlign &&
DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
- GV->setAlignment(MaybeAlign(PrefAlign));
+ GV->setAlignment(PrefAlign);
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
@@ -3371,7 +3395,7 @@ public:
if (!Visited.insert(P).second)
continue;
if (auto *PI = dyn_cast<Instruction>(P))
- if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
+ if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
for (auto *U : PI->users())
WorkList.push_back(cast<Value>(U));
Put(PI, V);
@@ -3416,7 +3440,7 @@ public:
void destroyNewNodes(Type *CommonType) {
// For safe erasing, replace the uses with dummy value first.
- auto *Dummy = UndefValue::get(CommonType);
+ auto *Dummy = PoisonValue::get(CommonType);
for (auto *I : AllPhiNodes) {
I->replaceAllUsesWith(Dummy);
I->eraseFromParent();
@@ -3785,7 +3809,7 @@ private:
SmallVector<Value *, 32> Worklist;
assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
"Address must be a Phi or Select node");
- auto *Dummy = UndefValue::get(CommonType);
+ auto *Dummy = PoisonValue::get(CommonType);
Worklist.push_back(Original);
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
@@ -4550,9 +4574,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
if (!RHS || RHS->getBitWidth() > 64)
return false;
- int64_t Scale = RHS->getSExtValue();
- if (Opcode == Instruction::Shl)
- Scale = 1LL << Scale;
+ int64_t Scale = Opcode == Instruction::Shl
+ ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
+ : RHS->getSExtValue();
return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
@@ -4783,7 +4807,6 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
}
// It isn't profitable to do this, roll back.
- //cerr << "NOT FOLDING: " << *I;
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
TPT.rollback(LastKnownGood);
@@ -4836,7 +4859,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
- // operand, we can't fold it!
+ // operand, we can't fold it! TODO: Also handle C_Address?
if (OpInfo.CallOperandVal == OpVal &&
(OpInfo.ConstraintType != TargetLowering::C_Memory ||
!OpInfo.isIndirect))
@@ -5158,8 +5181,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// GEP, collect the GEP. Skip the GEPs that are the new bases of
// previously split data structures.
LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
- if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
- LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
+ LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
}
NewAddrMode.OriginalValue = V;
@@ -5323,11 +5345,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- ResultPtr =
- AddrMode.InBounds
- ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
- "sunkaddr")
- : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr", AddrMode.InBounds);
}
ResultIndex = V;
@@ -5338,11 +5357,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- SunkAddr =
- AddrMode.InBounds
- ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
- "sunkaddr")
- : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr",
+ AddrMode.InBounds);
}
if (SunkAddr->getType() != Addr->getType())
@@ -5619,6 +5635,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ // TODO: Also handle C_Address?
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
@@ -6002,31 +6019,25 @@ bool CodeGenPrepare::optimizePhiType(
for (Value *V : Phi->incoming_values()) {
if (auto *OpPhi = dyn_cast<PHINode>(V)) {
if (!PhiNodes.count(OpPhi)) {
- if (Visited.count(OpPhi))
+ if (!Visited.insert(OpPhi).second)
return false;
PhiNodes.insert(OpPhi);
- Visited.insert(OpPhi);
Worklist.push_back(OpPhi);
}
} else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
if (!OpLoad->isSimple())
return false;
- if (!Defs.count(OpLoad)) {
- Defs.insert(OpLoad);
+ if (Defs.insert(OpLoad).second)
Worklist.push_back(OpLoad);
- }
} else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
- if (!Defs.count(OpEx)) {
- Defs.insert(OpEx);
+ if (Defs.insert(OpEx).second)
Worklist.push_back(OpEx);
- }
} else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
if (!ConvertTy)
ConvertTy = OpBC->getOperand(0)->getType();
if (OpBC->getOperand(0)->getType() != ConvertTy)
return false;
- if (!Defs.count(OpBC)) {
- Defs.insert(OpBC);
+ if (Defs.insert(OpBC).second) {
Worklist.push_back(OpBC);
AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
!isa<ExtractElementInst>(OpBC->getOperand(0));
@@ -6127,7 +6138,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) {
// Remove any old phi's that have been converted.
for (auto *I : DeletedInstrs) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -6979,12 +6990,12 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
return Changed;
}
-bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
Value *Cond = SI->getCondition();
Type *OldType = Cond->getType();
LLVMContext &Context = Cond->getContext();
EVT OldVT = TLI->getValueType(*DL, OldType);
- MVT RegType = TLI->getRegisterType(Context, OldVT);
+ MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
unsigned RegWidth = RegType.getSizeInBits();
if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
@@ -7019,7 +7030,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
ExtInst->setDebugLoc(SI->getDebugLoc());
SI->setCondition(ExtInst);
for (auto Case : SI->cases()) {
- APInt NarrowConst = Case.getCaseValue()->getValue();
+ const APInt &NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
Case.setValue(ConstantInt::get(Context, WideConst));
@@ -7028,6 +7039,89 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
return true;
}
+bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
+ // The SCCP optimization tends to produce code like this:
+ // switch(x) { case 42: phi(42, ...) }
+ // Materializing the constant for the phi-argument needs instructions; So we
+ // change the code to:
+ // switch(x) { case 42: phi(x, ...) }
+
+ Value *Condition = SI->getCondition();
+ // Avoid endless loop in degenerate case.
+ if (isa<ConstantInt>(*Condition))
+ return false;
+
+ bool Changed = false;
+ BasicBlock *SwitchBB = SI->getParent();
+ Type *ConditionType = Condition->getType();
+
+ for (const SwitchInst::CaseHandle &Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseBB = Case.getCaseSuccessor();
+ // Set to true if we previously checked that `CaseBB` is only reached by
+ // a single case from this switch.
+ bool CheckedForSinglePred = false;
+ for (PHINode &PHI : CaseBB->phis()) {
+ Type *PHIType = PHI.getType();
+ // If ZExt is free then we can also catch patterns like this:
+ // switch((i32)x) { case 42: phi((i64)42, ...); }
+ // and replace `(i64)42` with `zext i32 %x to i64`.
+ bool TryZExt =
+ PHIType->isIntegerTy() &&
+ PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
+ TLI->isZExtFree(ConditionType, PHIType);
+ if (PHIType == ConditionType || TryZExt) {
+ // Set to true to skip this case because of multiple preds.
+ bool SkipCase = false;
+ Value *Replacement = nullptr;
+ for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
+ Value *PHIValue = PHI.getIncomingValue(I);
+ if (PHIValue != CaseValue) {
+ if (!TryZExt)
+ continue;
+ ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
+ if (!PHIValueInt ||
+ PHIValueInt->getValue() !=
+ CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
+ continue;
+ }
+ if (PHI.getIncomingBlock(I) != SwitchBB)
+ continue;
+ // We cannot optimize if there are multiple case labels jumping to
+ // this block. This check may get expensive when there are many
+ // case labels so we test for it last.
+ if (!CheckedForSinglePred) {
+ CheckedForSinglePred = true;
+ if (SI->findCaseDest(CaseBB) == nullptr) {
+ SkipCase = true;
+ break;
+ }
+ }
+
+ if (Replacement == nullptr) {
+ if (PHIValue == CaseValue) {
+ Replacement = Condition;
+ } else {
+ IRBuilder<> Builder(SI);
+ Replacement = Builder.CreateZExt(Condition, PHIType);
+ }
+ }
+ PHI.setIncomingValue(I, Replacement);
+ Changed = true;
+ }
+ if (SkipCase)
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ bool Changed = optimizeSwitchType(SI);
+ Changed |= optimizeSwitchPhiConstants(SI);
+ return Changed;
+}
namespace {
@@ -7777,7 +7871,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+ if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
LargeOffsetGEPMap.erase(P);
P->replaceAllUsesWith(V);
P->eraseFromParent();