aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-09-02 21:17:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-08 17:34:50 +0000
commit06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e (patch)
tree62f873df87c7c675557a179e0c4c83fe9f3087bc /contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
parentcf037972ea8863e2bab7461d77345367d2c1e054 (diff)
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp438
1 files changed, 261 insertions, 177 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index dd431cc6f4f5..b00df0b6c6cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -82,7 +83,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -257,13 +257,17 @@ static cl::opt<bool>
"CodeGenPrepare."));
static cl::opt<bool>
- OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+ OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
cl::desc("Enable converting phi types in CodeGenPrepare"));
static cl::opt<unsigned>
HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
cl::desc("Least BB number of huge function."));
+static cl::opt<unsigned>
+ MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
+ cl::Hidden,
+ cl::desc("Max number of address users to look at"));
namespace {
enum ExtType {
@@ -294,16 +298,16 @@ class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM = nullptr;
- const TargetSubtargetInfo *SubtargetInfo;
+ const TargetSubtargetInfo *SubtargetInfo = nullptr;
const TargetLowering *TLI = nullptr;
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
const TargetTransformInfo *TTI = nullptr;
const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
- const TargetLibraryInfo *TLInfo;
- const LoopInfo *LI;
+ const TargetLibraryInfo *TLInfo = nullptr;
+ LoopInfo *LI = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- ProfileSummaryInfo *PSI;
+ ProfileSummaryInfo *PSI = nullptr;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -373,6 +377,15 @@ public:
bool runOnFunction(Function &F) override;
+ void releaseMemory() override {
+ // Clear per function information.
+ InsertedInsts.clear();
+ PromotedInsts.clear();
+ FreshBBs.clear();
+ BPI.reset();
+ BFI.reset();
+ }
+
StringRef getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -413,7 +426,7 @@ private:
void removeAllAssertingVHReferences(Value *V);
bool eliminateAssumptions(Function &F);
- bool eliminateFallThrough(Function &F);
+ bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -494,10 +507,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
DL = &F.getParent()->getDataLayout();
bool EverMadeChange = false;
- // Clear per function information.
- InsertedInsts.clear();
- PromotedInsts.clear();
- FreshBBs.clear();
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -574,11 +583,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Because the basic algorithm's complex is near O(N!).
IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
+ // Transformations above may invalidate dominator tree and/or loop info.
+ DT.reset();
+ LI->releaseMemory();
+ LI->analyze(getDT(F));
+
bool MadeChange = true;
bool FuncIterated = false;
while (MadeChange) {
MadeChange = false;
- DT.reset();
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
if (FuncIterated && !FreshBBs.contains(&BB))
@@ -587,6 +600,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
+ if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
+ DT.reset();
+
MadeChange |= Changed;
if (IsHugeFunc) {
// If the BB is updated, it may still has chance to be optimized.
@@ -602,9 +618,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
FreshBBs.insert(&BB);
else if (FuncIterated)
FreshBBs.erase(&BB);
-
- if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
- DT.reset();
} else {
// For small/normal functions, we restart BB iteration if the dominator
// tree of the Function was changed.
@@ -622,7 +635,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= optimizePhiTypes(F);
if (MadeChange)
- eliminateFallThrough(F);
+ eliminateFallThrough(F, DT.get());
+
+#ifndef NDEBUG
+ if (MadeChange && VerifyLoopInfo)
+ LI->verify(getDT(F));
+#endif
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
@@ -755,7 +773,7 @@ void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
/// Merge basic blocks which are connected by a single edge, where one of the
/// basic blocks has a single successor pointing to the other basic block,
/// which has a single predecessor.
-bool CodeGenPrepare::eliminateFallThrough(Function &F) {
+bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
// Use a temporary array to avoid iterator being invalidated when
@@ -777,13 +795,19 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
continue;
+ // Make an effort to skip unreachable blocks.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
// Merge BB into SinglePred and delete it.
- MergeBlockIntoPredecessor(BB);
+ MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
+ /* MemDep */ nullptr,
+ /* PredecessorWithTwoSuccessors */ false, DT);
Preds.insert(SinglePred);
if (IsHugeFunc) {
@@ -1579,6 +1603,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
/// intrinsic. Return true if any changes were made.
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
ModifyDT &ModifiedDT) {
+ bool EdgeCase = false;
Value *A, *B;
BinaryOperator *Add;
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
@@ -1587,11 +1612,12 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
A = Add->getOperand(0);
B = Add->getOperand(1);
+ EdgeCase = true;
}
if (!TLI->shouldFormOverflowOp(ISD::UADDO,
TLI->getValueType(*DL, Add->getType()),
- Add->hasNUsesOrMore(2)))
+ Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
return false;
// We don't want to move around uses of condition values this late, so we
@@ -1660,7 +1686,7 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
if (!TLI->shouldFormOverflowOp(ISD::USUBO,
TLI->getValueType(*DL, Sub->getType()),
- Sub->hasNUsesOrMore(2)))
+ Sub->hasNUsesOrMore(1)))
return false;
if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
@@ -1825,6 +1851,37 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
return true;
}
+/// Many architectures use the same instruction for both subtract and cmp. Try
+/// to swap cmp operands to match subtract operations to allow for CSE.
+static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
+ Value *Op0 = Cmp->getOperand(0);
+ Value *Op1 = Cmp->getOperand(1);
+ if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
+ isa<Constant>(Op1) || Op0 == Op1)
+ return false;
+
+ // If a subtract already has the same operands as a compare, swapping would be
+ // bad. If a subtract has the same operands as a compare but in reverse order,
+ // then swapping is good.
+ int GoodToSwap = 0;
+ unsigned NumInspected = 0;
+ for (const User *U : Op0->users()) {
+ // Avoid walking many users.
+ if (++NumInspected > 128)
+ return false;
+ if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
+ GoodToSwap++;
+ else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
+ GoodToSwap--;
+ }
+
+ if (GoodToSwap > 0) {
+ Cmp->swapOperands();
+ return true;
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1838,6 +1895,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (foldICmpWithDominatingICmp(Cmp, *TLI))
return true;
+ if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
+ return true;
+
return false;
}
@@ -2129,6 +2189,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
///
/// If the transform is performed, return true and set ModifiedDT to true.
static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ LoopInfo &LI,
const TargetLowering *TLI,
const DataLayout *DL, ModifyDT &ModifiedDT,
SmallSet<BasicBlock *, 32> &FreshBBs,
@@ -2168,6 +2229,13 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
if (IsHugeFunc)
FreshBBs.insert(EndBlock);
+ // Update the LoopInfo. The new blocks are in the same loop as the start
+ // block.
+ if (Loop *L = LI.getLoopFor(StartBlock)) {
+ L->addBasicBlockToLoop(CallBlock, LI);
+ L->addBasicBlockToLoop(EndBlock, LI);
+ }
+
// Set up a builder to create a compare, conditional branch, and PHI.
IRBuilder<> Builder(CountZeros->getContext());
Builder.SetInsertPoint(StartBlock->getTerminator());
@@ -2279,7 +2347,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
if (!Arg->getType()->isPointerTy())
continue;
unsigned AS = Arg->getType()->getPointerAddressSpace();
- return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
+ return true;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
@@ -2341,7 +2410,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
- return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs,
+ return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
IsHugeFunc);
case Intrinsic::fshl:
case Intrinsic::fshr:
@@ -2349,24 +2418,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::dbg_assign:
case Intrinsic::dbg_value:
return fixupDbgValue(II);
- case Intrinsic::vscale: {
- // If datalayout has no special restrictions on vector data layout,
- // replace `llvm.vscale` by an equivalent constant expression
- // to benefit from cheap constant propagation.
- Type *ScalableVectorTy =
- VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
- if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinValue() == 8) {
- auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
- auto *One = ConstantInt::getSigned(II->getType(), 1);
- auto *CGep =
- ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
- replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()),
- FreshBBs, IsHugeFunc);
- II->eraseFromParent();
- return true;
- }
- break;
- }
case Intrinsic::masked_gather:
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
@@ -2442,6 +2493,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
if (!RetI)
return false;
+ assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
+
PHINode *PN = nullptr;
ExtractValueInst *EVI = nullptr;
BitCastInst *BCI = nullptr;
@@ -2687,7 +2740,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
if (InBounds)
OS << "inbounds ";
if (BaseGV) {
- OS << (NeedPlus ? " + " : "") << "GV:";
+ OS << "GV:";
BaseGV->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
@@ -3073,6 +3126,9 @@ class TypePromotionTransaction {
~InstructionRemover() override { delete Replacer; }
+ InstructionRemover &operator=(const InstructionRemover &other) = delete;
+ InstructionRemover(const InstructionRemover &other) = delete;
+
/// Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -3258,7 +3314,7 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
/// True if we are optimizing for size.
- bool OptSize;
+ bool OptSize = false;
ProfileSummaryInfo *PSI;
BlockFrequencyInfo *BFI;
@@ -3574,10 +3630,15 @@ private:
/// Original Address.
Value *Original;
+ /// Common value among addresses
+ Value *CommonValue = nullptr;
+
public:
AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
: SQ(_SQ), Original(OriginalValue) {}
+ ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
+
/// Get the combined AddrMode
const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
@@ -3662,13 +3723,21 @@ public:
if (!initializeMap(Map))
return false;
- Value *CommonValue = findCommon(Map);
+ CommonValue = findCommon(Map);
if (CommonValue)
AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
return CommonValue != nullptr;
}
private:
+ /// `CommonValue` may be a placeholder inserted by us.
+ /// If the placeholder is not used, we should remove this dead instruction.
+ void eraseCommonValueIfDead() {
+ if (CommonValue && CommonValue->getNumUses() == 0)
+ if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
+ CommonInst->eraseFromParent();
+ }
+
/// Initialize Map with anchor values. For address seen
/// we set the value of different field saw in this address.
/// At the same time we find a common type for different field we will
@@ -3866,17 +3935,17 @@ private:
SimplificationTracker &ST) {
while (!TraverseOrder.empty()) {
Value *Current = TraverseOrder.pop_back_val();
- assert(Map.find(Current) != Map.end() && "No node to fill!!!");
+ assert(Map.contains(Current) && "No node to fill!!!");
Value *V = Map[Current];
if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
// CurrentValue also must be Select.
auto *CurrentSelect = cast<SelectInst>(Current);
auto *TrueValue = CurrentSelect->getTrueValue();
- assert(Map.find(TrueValue) != Map.end() && "No True Value!");
+ assert(Map.contains(TrueValue) && "No True Value!");
Select->setTrueValue(ST.Get(Map[TrueValue]));
auto *FalseValue = CurrentSelect->getFalseValue();
- assert(Map.find(FalseValue) != Map.end() && "No False Value!");
+ assert(Map.contains(FalseValue) && "No False Value!");
Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
@@ -3884,7 +3953,7 @@ private:
// Fill the Phi node with values from predecessors.
for (auto *B : predecessors(PHI->getParent())) {
Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
- assert(Map.find(PV) != Map.end() && "No predecessor Value!");
+ assert(Map.contains(PV) && "No predecessor Value!");
PHI->addIncoming(ST.Get(Map[PV]), B);
}
}
@@ -3908,7 +3977,7 @@ private:
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
// if it is already visited or it is an ending value then skip it.
- if (Map.find(Current) != Map.end())
+ if (Map.contains(Current))
continue;
TraverseOrder.push_back(Current);
@@ -4627,7 +4696,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
return false;
}
case Instruction::Add: {
- // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ // Check to see if we can merge in one operand, then the other. If so, we
+ // win.
ExtAddrMode BackupAddrMode = AddrMode;
unsigned OldSize = AddrModeInsts.size();
// Start a transaction at this point.
@@ -4637,9 +4707,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
+ // Try to match an integer constant second to increase its chance of ending
+ // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
+ int First = 0, Second = 1;
+ if (isa<ConstantInt>(AddrInst->getOperand(First))
+ && !isa<ConstantInt>(AddrInst->getOperand(Second)))
+ std::swap(First, Second);
AddrMode.InBounds = false;
- if (matchAddr(AddrInst->getOperand(1), Depth + 1) &&
- matchAddr(AddrInst->getOperand(0), Depth + 1))
+ if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(Second), Depth + 1))
return true;
// Restore the old addr mode info.
@@ -4647,9 +4723,10 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
AddrModeInsts.resize(OldSize);
TPT.rollback(LastKnownGood);
- // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (matchAddr(AddrInst->getOperand(0), Depth + 1) &&
- matchAddr(AddrInst->getOperand(1), Depth + 1))
+ // Otherwise this was over-aggressive. Try merging operands in the opposite
+ // order.
+ if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(First), Depth + 1))
return true;
// Otherwise we definitely can't merge the ADD in.
@@ -4698,7 +4775,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantInt *CI =
dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
const APInt &CVal = CI->getValue();
- if (CVal.getMinSignedBits() <= 64) {
+ if (CVal.getSignificantBits() <= 64) {
ConstantOffset += CVal.getSExtValue() * TypeSize;
continue;
}
@@ -4718,36 +4795,35 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 ||
- TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
- // Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
+ if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
if (!cast<GEPOperator>(AddrInst)->isInBounds())
AddrMode.InBounds = false;
return true;
- }
- } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
- TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
- ConstantOffset > 0) {
- // Record GEPs with non-zero offsets as candidates for splitting in the
- // event that the offset cannot fit into the r+i addressing mode.
- // Simple and common case that only one GEP is used in calculating the
- // address for the memory access.
- Value *Base = AddrInst->getOperand(0);
- auto *BaseI = dyn_cast<Instruction>(Base);
- auto *GEP = cast<GetElementPtrInst>(AddrInst);
- if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
- (BaseI && !isa<CastInst>(BaseI) &&
- !isa<GetElementPtrInst>(BaseI))) {
- // Make sure the parent block allows inserting non-PHI instructions
- // before the terminator.
- BasicBlock *Parent =
- BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
- if (!Parent->getTerminator()->isEHPad())
- LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
- }
}
AddrMode.BaseOffs -= ConstantOffset;
+
+ if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
+ TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
+ ConstantOffset > 0) {
+ // Record GEPs with non-zero offsets as candidates for splitting in
+ // the event that the offset cannot fit into the r+i addressing mode.
+ // Simple and common case that only one GEP is used in calculating the
+ // address for the memory access.
+ Value *Base = AddrInst->getOperand(0);
+ auto *BaseI = dyn_cast<Instruction>(Base);
+ auto *GEP = cast<GetElementPtrInst>(AddrInst);
+ if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+ (BaseI && !isa<CastInst>(BaseI) &&
+ !isa<GetElementPtrInst>(BaseI))) {
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
+ BasicBlock *Parent = BaseI ? BaseI->getParent()
+ : &GEP->getFunction()->getEntryBlock();
+ if (!Parent->getTerminator()->isEHPad())
+ LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
+ }
+ }
+
return false;
}
@@ -4963,18 +5039,14 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
-// Max number of memory uses to look at before aborting the search to conserve
-// compile time.
-static constexpr int MaxMemoryUsesToScan = 20;
-
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
/// Add accessed addresses and types to MemoryUses.
static bool FindAllMemoryUses(
- Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, int SeenInsts = 0) {
+ BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -4987,33 +5059,33 @@ static bool FindAllMemoryUses(
for (Use &U : I->uses()) {
// Conservatively return true if we're seeing a large number or a deep chain
// of users. This avoids excessive compilation times in pathological cases.
- if (SeenInsts++ >= MaxMemoryUsesToScan)
+ if (SeenInsts++ >= MaxAddressUsersToScan)
return true;
Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
- MemoryUses.push_back({U.get(), LI->getType()});
+ MemoryUses.push_back({&U, LI->getType()});
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});
+ MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
continue;
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
+ MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
continue;
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
+ MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
continue;
}
@@ -5045,6 +5117,17 @@ static bool FindAllMemoryUses(
return false;
}
+static bool FindAllMemoryUses(
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ unsigned SeenInsts = 0;
+ SmallPtrSet<Instruction *, 16> ConsideredInsts;
+ return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts);
+}
+
+
/// Return true if Val is already known to be live at the use site that we're
/// folding it into. If so, there is no cost to include it in the addressing
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
@@ -5126,10 +5209,8 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
// we can remove the addressing mode and effectively trade one live register
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
- SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
- SmallPtrSet<Instruction *, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI,
- BFI))
+ SmallVector<std::pair<Use *, Type *>, 16> MemoryUses;
+ if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -5142,8 +5223,9 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
SmallVector<Instruction *, 32> MatchedAddrModeInsts;
- for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
- Value *Address = Pair.first;
+ for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first->get();
+ Instruction *UserI = cast<Instruction>(Pair.first->getUser());
Type *AddressAccessTy = Pair.second;
unsigned AS = Address->getType()->getPointerAddressSpace();
@@ -5156,7 +5238,7 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
- AddressAccessTy, AS, MemoryInst, Result,
+ AddressAccessTy, AS, UserI, Result,
InsertedInsts, PromotedInsts, TPT,
LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
@@ -5693,7 +5775,8 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// Create a scalar GEP if there are more than 2 operands.
if (Ops.size() != 2) {
// Replace the last index with 0.
- Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+ Ops[FinalIndex] =
+ Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
SourceTy = GetElementPtrInst::getIndexedType(
SourceTy, ArrayRef(Ops).drop_front());
@@ -6027,6 +6110,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t Offset = LargeOffsetGEP->second;
if (Offset != BaseOffset) {
TargetLowering::AddrMode AddrMode;
+ AddrMode.HasBaseReg = true;
AddrMode.BaseOffs = Offset - BaseOffset;
// The result type of the GEP might not be the type of the memory
// access.
@@ -6044,7 +6128,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
// Generate a new GEP to replace the current one.
LLVMContext &Ctx = GEP->getContext();
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
Type *I8PtrTy =
Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
Type *I8Ty = Type::getInt8Ty(Ctx);
@@ -6062,7 +6146,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
- SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
@@ -6074,7 +6158,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
- Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
+ Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
@@ -6090,7 +6174,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
} else {
// Calculate the new offset for the new GEP.
- Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
+ Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
if (GEP->getType() != I8PtrTy)
@@ -6872,9 +6956,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
return false;
TargetLowering::SelectSupportKind SelectKind;
- if (VectorCond)
- SelectKind = TargetLowering::VectorMaskSelect;
- else if (SI->getType()->isVectorTy())
+ if (SI->getType()->isVectorTy())
SelectKind = TargetLowering::ScalarCondVectorVal;
else
SelectKind = TargetLowering::ScalarValSelect;
@@ -6915,88 +6997,88 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// first branch will point directly to select.end, and the corresponding PHI
// predecessor block will be the start block.
- // First, we split the block containing the select into 2 blocks.
+ // Collect values that go on the true side and the values that go on the false
+ // side.
+ SmallVector<Instruction *> TrueInstrs, FalseInstrs;
+ for (SelectInst *SI : ASI) {
+ if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
+ TrueInstrs.push_back(cast<Instruction>(V));
+ if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
+ FalseInstrs.push_back(cast<Instruction>(V));
+ }
+
+ // Split the select block, according to how many (if any) values go on each
+ // side.
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
- BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- if (IsHugeFunc)
- FreshBBs.insert(EndBlock);
- BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
- // Delete the unconditional branch that was just created by the split.
- StartBlock->getTerminator()->eraseFromParent();
+ IRBuilder<> IB(SI);
+ auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
- // These are the new basic blocks for the conditional branch.
- // At least one will become an actual new basic block.
BasicBlock *TrueBlock = nullptr;
BasicBlock *FalseBlock = nullptr;
+ BasicBlock *EndBlock = nullptr;
BranchInst *TrueBranch = nullptr;
BranchInst *FalseBranch = nullptr;
-
- // Sink expensive instructions into the conditional blocks to avoid executing
- // them speculatively.
- for (SelectInst *SI : ASI) {
- if (sinkSelectOperand(TTI, SI->getTrueValue())) {
- if (TrueBlock == nullptr) {
- TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
- EndBlock->getParent(), EndBlock);
- TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
- if (IsHugeFunc)
- FreshBBs.insert(TrueBlock);
- TrueBranch->setDebugLoc(SI->getDebugLoc());
- }
- auto *TrueInst = cast<Instruction>(SI->getTrueValue());
- TrueInst->moveBefore(TrueBranch);
- }
- if (sinkSelectOperand(TTI, SI->getFalseValue())) {
- if (FalseBlock == nullptr) {
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
- EndBlock->getParent(), EndBlock);
- if (IsHugeFunc)
- FreshBBs.insert(FalseBlock);
- FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- FalseBranch->setDebugLoc(SI->getDebugLoc());
- }
- auto *FalseInst = cast<Instruction>(SI->getFalseValue());
- FalseInst->moveBefore(FalseBranch);
- }
+ if (TrueInstrs.size() == 0) {
+ FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
+ } else if (FalseInstrs.size() == 0) {
+ TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ TrueBlock = TrueBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ } else {
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
+ SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
+ nullptr, nullptr, LI);
+ TrueBranch = cast<BranchInst>(ThenTerm);
+ FalseBranch = cast<BranchInst>(ElseTerm);
+ TrueBlock = TrueBranch->getParent();
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ }
+
+ EndBlock->setName("select.end");
+ if (TrueBlock)
+ TrueBlock->setName("select.true.sink");
+ if (FalseBlock)
+ FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
+ : "select.false.sink");
+
+ if (IsHugeFunc) {
+ if (TrueBlock)
+ FreshBBs.insert(TrueBlock);
+ if (FalseBlock)
+ FreshBBs.insert(FalseBlock);
+ FreshBBs.insert(EndBlock);
}
- // If there was nothing to sink, then arbitrarily choose the 'false' side
- // for a new input value to the PHI.
- if (TrueBlock == FalseBlock) {
- assert(TrueBlock == nullptr &&
- "Unexpected basic block transform while optimizing select");
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
- EndBlock->getParent(), EndBlock);
- if (IsHugeFunc)
- FreshBBs.insert(FalseBlock);
- auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- FalseBranch->setDebugLoc(SI->getDebugLoc());
- }
+ static const unsigned MD[] = {
+ LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+ LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
+ StartBlock->getTerminator()->copyMetadata(*SI, MD);
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ for (Instruction *I : TrueInstrs)
+ I->moveBefore(TrueBranch);
+ for (Instruction *I : FalseInstrs)
+ I->moveBefore(FalseBranch);
- // Insert the real conditional branch based on the original condition.
// If we did not create a new block for one of the 'true' or 'false' paths
// of the condition, it means that side of the branch goes to the end block
// directly and the path originates from the start block from the point of
// view of the new PHI.
- BasicBlock *TT, *FT;
- if (TrueBlock == nullptr) {
- TT = EndBlock;
- FT = FalseBlock;
+ if (TrueBlock == nullptr)
TrueBlock = StartBlock;
- } else if (FalseBlock == nullptr) {
- TT = TrueBlock;
- FT = EndBlock;
+ else if (FalseBlock == nullptr)
FalseBlock = StartBlock;
- } else {
- TT = TrueBlock;
- FT = FalseBlock;
- }
- IRBuilder<> IB(SI);
- auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
- IB.CreateCondBr(CondFr, TT, FT, SI);
SmallPtrSet<const Instruction *, 2> INS;
INS.insert(ASI.begin(), ASI.end());
@@ -7105,7 +7187,7 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
- // now. So we put the operands defs' BBs into FreshBBs to do optmization.
+ // now. So we put the operands defs' BBs into FreshBBs to do optimization.
for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
if (!OpDef)
@@ -7696,7 +7778,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
// whereas scalable vectors would have to be shifted by
// <2log(vscale) + number of bits> in order to store the
// low/high parts. Bailing out for now.
- if (isa<ScalableVectorType>(StoreType))
+ if (StoreType->isScalableTy())
return false;
if (!DL.typeSizeEqualsStoreSize(StoreType) ||
@@ -8051,8 +8133,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return true;
if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
- TLI->optimizeExtendOrTruncateConversion(I,
- LI->getLoopFor(I->getParent())))
+ TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent()), *TTI))
return true;
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -8064,7 +8146,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return SinkCast(CI);
} else {
if (TLI->optimizeExtendOrTruncateConversion(
- I, LI->getLoopFor(I->getParent())))
+ I, LI->getLoopFor(I->getParent()), *TTI))
return true;
bool MadeChange = optimizeExt(I);
@@ -8128,7 +8210,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
GEPI->getName(), GEPI);
NC->setDebugLoc(GEPI->getDebugLoc());
replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
- GEPI->eraseFromParent();
+ RecursivelyDeleteTriviallyDeadInstructions(
+ GEPI, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
++NumGEPsElim;
optimizeInst(NC, ModifiedDT);
return true;