summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-01-06 21:34:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-01-06 21:34:26 +0000
commitd215fd3b74b90f5dc1964610926fcc2a20f959aa (patch)
tree0c9f21e40eae033d6760008729f37d2103e2c654 /lib
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/InstructionSimplify.cpp57
-rw-r--r--lib/Analysis/ScalarEvolution.cpp26
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp29
-rw-r--r--lib/Analysis/ValueTracking.cpp87
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp58
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp13
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp16
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp11
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp58
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp37
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp10
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp12
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp26
-rw-r--r--lib/IR/BasicBlock.cpp3
-rw-r--r--lib/IR/Verifier.cpp23
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp2
-rw-r--r--lib/Passes/PassBuilder.cpp17
-rw-r--r--lib/Support/CommandLine.cpp48
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp16
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td38
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp2
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp36
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h4
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp40
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp4
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h5
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td10
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp279
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h28
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp41
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h16
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp6
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h5
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp8
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h8
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td44
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp3
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h4
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp12
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h5
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp35
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp86
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp2
-rw-r--r--lib/Target/PowerPC/PPCPreEmitPeephole.cpp2
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp3
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h4
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.cpp3
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoC.td4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp10
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h10
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp153
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp341
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86InstrAVX512.td128
-rw-r--r--lib/Target/X86/X86InstrMMX.td14
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp17
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp11
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp8
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp21
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp54
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp27
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp14
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp6
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp34
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp10
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp32
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp11
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp22
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp12
-rw-r--r--lib/Transforms/Utils/Local.cpp16
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp20
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp50
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp14
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp8
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp133
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp54
103 files changed, 1623 insertions, 1118 deletions
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 93fb1143e505..f382a1f50188 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -826,7 +826,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
MaxRecurse))
return V;
- // Mul distributes over Add. Try some generic simplifications based on this.
+ // Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
Q, MaxRecurse))
return V;
@@ -3838,12 +3838,13 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
// Fold into undef if index is out of bounds.
if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements();
-
if (CI->uge(NumElements))
return UndefValue::get(Vec->getType());
}
- // TODO: We should also fold if index is iteslf an undef.
+ // If index is undef, it might be out of bounds (see above case)
+ if (isa<UndefValue>(Idx))
+ return UndefValue::get(Vec->getType());
return nullptr;
}
@@ -3896,10 +3897,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
- if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
- if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
- if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
- return Elt;
+ if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
+ if (IdxC->getValue().uge(Vec->getType()->getVectorNumElements()))
+ // definitely out of bounds, thus undefined result
+ return UndefValue::get(Vec->getType()->getVectorElementType());
+ if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
+ return Elt;
+ }
// An undef extract index can be arbitrarily chosen to be an out-of-range
// index value, which would result in the instruction being undef.
@@ -4489,28 +4493,55 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
}
+ Value *IIOperand = *ArgBegin;
+ Value *X;
switch (IID) {
case Intrinsic::fabs: {
- if (SignBitMustBeZero(*ArgBegin, Q.TLI))
- return *ArgBegin;
+ if (SignBitMustBeZero(IIOperand, Q.TLI))
+ return IIOperand;
return nullptr;
}
case Intrinsic::bswap: {
- Value *IIOperand = *ArgBegin;
- Value *X = nullptr;
// bswap(bswap(x)) -> x
if (match(IIOperand, m_BSwap(m_Value(X))))
return X;
return nullptr;
}
case Intrinsic::bitreverse: {
- Value *IIOperand = *ArgBegin;
- Value *X = nullptr;
// bitreverse(bitreverse(x)) -> x
if (match(IIOperand, m_BitReverse(m_Value(X))))
return X;
return nullptr;
}
+ case Intrinsic::exp: {
+ // exp(log(x)) -> x
+ if (Q.CxtI->isFast() &&
+ match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X))))
+ return X;
+ return nullptr;
+ }
+ case Intrinsic::exp2: {
+ // exp2(log2(x)) -> x
+ if (Q.CxtI->isFast() &&
+ match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
+ return X;
+ return nullptr;
+ }
+ case Intrinsic::log: {
+ // log(exp(x)) -> x
+ if (Q.CxtI->isFast() &&
+ match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
+ return X;
+ return nullptr;
+ }
+ case Intrinsic::log2: {
+ // log2(exp2(x)) -> x
+ if (Q.CxtI->isFast() &&
+ match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) {
+ return X;
+ }
+ return nullptr;
+ }
default:
return nullptr;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f34549ae52b4..10b5c74e378b 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2358,7 +2358,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
FoundMatch = true;
}
if (FoundMatch)
- return getAddExpr(Ops, Flags);
+ return getAddExpr(Ops, Flags, Depth + 1);
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
@@ -6402,9 +6402,8 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
BasicBlock *Header = L->getHeader();
// Push all Loop-header PHIs onto the Worklist stack.
- for (BasicBlock::iterator I = Header->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- Worklist.push_back(PN);
+ for (PHINode &PN : Header->phis())
+ Worklist.push_back(&PN);
}
const ScalarEvolution::BackedgeTakenInfo &
@@ -7638,12 +7637,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
if (!Latch)
return nullptr;
- for (auto &I : *Header) {
- PHINode *PHI = dyn_cast<PHINode>(&I);
- if (!PHI) break;
- auto *StartCST = getOtherIncomingValue(PHI, Latch);
- if (!StartCST) continue;
- CurrentIterVals[PHI] = StartCST;
+ for (PHINode &PHI : Header->phis()) {
+ if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
+ CurrentIterVals[&PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return RetVal = nullptr;
@@ -7720,13 +7716,9 @@ const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Should follow from NumIncomingValues == 2!");
- for (auto &I : *Header) {
- PHINode *PHI = dyn_cast<PHINode>(&I);
- if (!PHI)
- break;
- auto *StartCST = getOtherIncomingValue(PHI, Latch);
- if (!StartCST) continue;
- CurrentIterVals[PHI] = StartCST;
+ for (PHINode &PHI : Header->phis()) {
+ if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
+ CurrentIterVals[&PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
return getCouldNotCompute();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 3ceda677ba61..53ce33bacbe9 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1154,16 +1154,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IVIncInsertLoop &&
SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
- for (auto &I : *L->getHeader()) {
- auto *PN = dyn_cast<PHINode>(&I);
- // Found first non-phi, the rest of instructions are also not Phis.
- if (!PN)
- break;
-
- if (!SE.isSCEVable(PN->getType()))
+ for (PHINode &PN : L->getHeader()->phis()) {
+ if (!SE.isSCEVable(PN.getType()))
continue;
- const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
+ const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
if (!PhiSCEV)
continue;
@@ -1175,16 +1170,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
continue;
Instruction *TempIncV =
- cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+ cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
// Check whether we can reuse this PHI node.
if (LSRMode) {
- if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
+ if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
continue;
if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
continue;
} else {
- if (!isNormalAddRecExprPHI(PN, TempIncV, L))
+ if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
continue;
}
@@ -1193,7 +1188,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IncV = TempIncV;
TruncTy = nullptr;
InvertStep = false;
- AddRecPhiMatch = PN;
+ AddRecPhiMatch = &PN;
break;
}
@@ -1203,7 +1198,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
// Record the phi node. But don't stop we might find an exact match
// later.
- AddRecPhiMatch = PN;
+ AddRecPhiMatch = &PN;
IncV = TempIncV;
TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
}
@@ -1863,12 +1858,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
const TargetTransformInfo *TTI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
- for (auto &I : *L->getHeader()) {
- if (auto *PN = dyn_cast<PHINode>(&I))
- Phis.push_back(PN);
- else
- break;
- }
+ for (PHINode &PN : L->getHeader()->phis())
+ Phis.push_back(&PN);
if (TTI)
std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index cd4cee631568..a0032f99ec20 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -2264,9 +2264,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// ashr X, C -> adds C sign bits. Vectors too.
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
- unsigned ShAmtLimited = ShAmt->getZExtValue();
- if (ShAmtLimited >= TyBits)
+ if (ShAmt->uge(TyBits))
break; // Bad shift.
+ unsigned ShAmtLimited = ShAmt->getZExtValue();
Tmp += ShAmtLimited;
if (Tmp > TyBits) Tmp = TyBits;
}
@@ -2277,9 +2277,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
if (match(U->getOperand(1), m_APInt(ShAmt))) {
// shl destroys sign bits.
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (ShAmt->uge(TyBits) || // Bad shift.
+ ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
Tmp2 = ShAmt->getZExtValue();
- if (Tmp2 >= TyBits || // Bad shift.
- Tmp2 >= Tmp) break; // Shifted all sign bits out.
return Tmp - Tmp2;
}
break;
@@ -4161,6 +4161,81 @@ static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
return {SPF_UNKNOWN, SPNB_NA, false};
}
+/// Recognize variations of:
+/// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
+static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal) {
+ // TODO: Allow FP min/max with nnan/nsz.
+ assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
+
+ Value *A, *B;
+ SelectPatternResult L = matchSelectPattern(TrueVal, A, B);
+ if (!SelectPatternResult::isMinOrMax(L.Flavor))
+ return {SPF_UNKNOWN, SPNB_NA, false};
+
+ Value *C, *D;
+ SelectPatternResult R = matchSelectPattern(FalseVal, C, D);
+ if (L.Flavor != R.Flavor)
+ return {SPF_UNKNOWN, SPNB_NA, false};
+
+ // Match the compare to the min/max operations of the select operands.
+ switch (L.Flavor) {
+ case SPF_SMIN:
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(CmpLHS, CmpRHS);
+ }
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+ break;
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ case SPF_SMAX:
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(CmpLHS, CmpRHS);
+ }
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+ break;
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ case SPF_UMIN:
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(CmpLHS, CmpRHS);
+ }
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
+ break;
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ case SPF_UMAX:
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(CmpLHS, CmpRHS);
+ }
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+ break;
+ return {SPF_UNKNOWN, SPNB_NA, false};
+ default:
+ llvm_unreachable("Bad flavor while matching min/max");
+ }
+
+ // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+ if (CmpLHS == A && CmpRHS == C && D == B)
+ return {L.Flavor, SPNB_NA, false};
+
+ // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
+ if (CmpLHS == A && CmpRHS == D && C == B)
+ return {L.Flavor, SPNB_NA, false};
+
+ // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
+ if (CmpLHS == B && CmpRHS == C && D == A)
+ return {L.Flavor, SPNB_NA, false};
+
+ // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
+ if (CmpLHS == B && CmpRHS == D && C == A)
+ return {L.Flavor, SPNB_NA, false};
+
+ return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
/// Match non-obvious integer minimum and maximum sequences.
static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
Value *CmpLHS, Value *CmpRHS,
@@ -4174,6 +4249,10 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
return SPR;
+ SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
+ if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
+ return SPR;
+
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index d6f55bba716f..9dc1ab4e6bb5 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -633,16 +633,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
if (DestBBPred == BB)
continue;
- bool HasAllSameValue = true;
- BasicBlock::const_iterator DestBBI = DestBB->begin();
- while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
- if (DestPN->getIncomingValueForBlock(BB) !=
- DestPN->getIncomingValueForBlock(DestBBPred)) {
- HasAllSameValue = false;
- break;
- }
- }
- if (HasAllSameValue)
+ if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
+ return DestPN.getIncomingValueForBlock(BB) ==
+ DestPN.getIncomingValueForBlock(DestBBPred);
+ }))
SameIncomingValueBBs.insert(DestBBPred);
}
@@ -672,9 +666,8 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
// the successor. If there are more complex condition (e.g. preheaders),
// don't mess around with them.
- BasicBlock::const_iterator BBI = BB->begin();
- while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- for (const User *U : PN->users()) {
+ for (const PHINode &PN : BB->phis()) {
+ for (const User *U : PN.users()) {
const Instruction *UI = cast<Instruction>(U);
if (UI->getParent() != DestBB || !isa<PHINode>(UI))
return false;
@@ -713,10 +706,9 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
if (BBPreds.count(Pred)) { // Common predecessor?
- BBI = DestBB->begin();
- while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- const Value *V1 = PN->getIncomingValueForBlock(Pred);
- const Value *V2 = PN->getIncomingValueForBlock(BB);
+ for (const PHINode &PN : DestBB->phis()) {
+ const Value *V1 = PN.getIncomingValueForBlock(Pred);
+ const Value *V2 = PN.getIncomingValueForBlock(BB);
// If V2 is a phi node in BB, look up what the mapped value will be.
if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
@@ -759,11 +751,9 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
// to handle the new incoming edges it is about to have.
- PHINode *PN;
- for (BasicBlock::iterator BBI = DestBB->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ for (PHINode &PN : DestBB->phis()) {
// Remove the incoming value for BB, and remember it.
- Value *InVal = PN->removeIncomingValue(BB, false);
+ Value *InVal = PN.removeIncomingValue(BB, false);
// Two options: either the InVal is a phi node defined in BB or it is some
// value that dominates BB.
@@ -771,17 +761,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
if (InValPhi && InValPhi->getParent() == BB) {
// Add all of the input values of the input PHI as inputs of this phi.
for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
- PN->addIncoming(InValPhi->getIncomingValue(i),
- InValPhi->getIncomingBlock(i));
+ PN.addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
} else {
// Otherwise, add one instance of the dominating value for each edge that
// we will be adding.
if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
- PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+ PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
} else {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- PN->addIncoming(InVal, *PI);
+ PN.addIncoming(InVal, *PI);
}
}
}
@@ -6497,22 +6487,16 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
std::swap(TBB, FBB);
// Replace the old BB with the new BB.
- for (auto &I : *TBB) {
- PHINode *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- break;
+ for (PHINode &PN : TBB->phis()) {
int i;
- while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
- PN->setIncomingBlock(i, TmpBB);
+ while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
+ PN.setIncomingBlock(i, TmpBB);
}
// Add another incoming edge form the new BB.
- for (auto &I : *FBB) {
- PHINode *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- break;
- auto *Val = PN->getIncomingValueForBlock(&BB);
- PN->addIncoming(Val, TmpBB);
+ for (PHINode &PN : FBB->phis()) {
+ auto *Val = PN.getIncomingValueForBlock(&BB);
+ PN.addIncoming(Val, TmpBB);
}
// Update the branch weights (from SelectionDAGBuilder::
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 433f99b0113b..705d4ded5b56 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -815,7 +815,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);
- if (!F || !F->isIntrinsic()) {
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ if (F && F->isIntrinsic()) {
+ ID = F->getIntrinsicID();
+ if (TII && ID == Intrinsic::not_intrinsic)
+ ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
+ }
+
+ if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
SmallVector<unsigned, 8> Args;
for (auto &Arg: CI.arg_operands())
@@ -827,10 +834,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
});
}
- Intrinsic::ID ID = F->getIntrinsicID();
- if (TII && ID == Intrinsic::not_intrinsic)
- ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
-
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a3b43c92a7fc..c7118201b753 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -813,7 +813,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
unsigned Zero = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildConstant(Zero, 0);
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+
+ // For *signed* multiply, overflow is detected by checking:
+ // (hi != (lo >> bitwidth-1))
+ if (Opcode == TargetOpcode::G_SMULH) {
+ unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
+ unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
+ MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
+ .addDef(Shifted)
+ .addUse(Res)
+ .addUse(ShiftAmt);
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
+ } else {
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+ }
MI.eraseFromParent();
return Legalized;
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 77a7aaa95732..4c6e21ab315a 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -136,8 +136,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
- Options.MCOptions);
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
@@ -151,8 +150,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
- Options.MCOptions);
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
if (!MCE || !MAB)
return true;
@@ -225,17 +223,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCRegisterInfo &MRI = *getMCRegisterInfo();
MCCodeEmitter *MCE =
getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
- Options.MCOptions);
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
if (!MCE || !MAB)
return true;
const Triple &T = getTargetTriple();
- const MCSubtargetInfo &STI = *getMCSubtargetInfo();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out,
std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 34572f24c181..75e3d35169cf 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -242,8 +242,11 @@ public:
// We are storing a MachineOperand outside a MachineInstr.
locations.back().clearParent();
// Don't store def operands.
- if (locations.back().isReg())
+ if (locations.back().isReg()) {
+ if (locations.back().isDef())
+ locations.back().setIsDead(false);
locations.back().setIsUse();
+ }
return locations.size() - 1;
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3218dce8f575..81bff4d7eefa 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3850,7 +3850,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
return false;
}
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
case ISD::AssertZext: {
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -13783,30 +13782,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
- // Deal with elidable overlapping chained stores.
- if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain))
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed() &&
- ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() &&
- !ST1->getBasePtr().isUndef() && !ST->isVolatile()) {
- BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG);
- BaseIndexOffset ST1BasePtr =
- BaseIndexOffset::match(ST1->getBasePtr(), DAG);
- unsigned STBytes = ST->getMemoryVT().getStoreSize();
- unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize();
- int64_t PtrDiff;
- // If this is a store who's preceeding store to a subset of the same
- // memory and no one other node is chained to that store we can
- // effectively drop the store. Do not remove stores to undef as they may
- // be used as data sinks.
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+ !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (ST1->getValue() == Value) {
+ // The store is dead, remove it.
+ return Chain;
+ }
- if (((ST->getBasePtr() == ST1->getBasePtr()) &&
- (ST->getValue() == ST1->getValue())) ||
- (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) &&
- (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) {
+ // If this is a store who's preceeding store to the same location
+ // and no one other node is chained to that store we can effectively
+ // drop the store. Do not remove stores to undef as they may be used as
+ // data sinks.
+ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef()) {
+ // ST1 is fully overwritten and can be elided. Combine with it's chain
+ // value.
CombineTo(ST1, ST1->getChain());
- return SDValue(N, 0);
+ return SDValue();
}
}
+ }
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index d3c94b5f9e6b..3c856914053b 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -2051,11 +2051,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// At this point we know that there is a 1-1 correspondence between LLVM PHI
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
- for (BasicBlock::const_iterator I = SuccBB->begin();
- const auto *PN = dyn_cast<PHINode>(I); ++I) {
-
+ for (const PHINode &PN : SuccBB->phis()) {
// Ignore dead phi's.
- if (PN->use_empty())
+ if (PN.use_empty())
continue;
// Only handle legal types. Two interesting things to note here. First,
@@ -2064,7 +2062,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// own moves. Second, this check is necessary because FastISel doesn't
// use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
- EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
+ EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
// Handle integer promotions, though, because they're common and easy.
if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
@@ -2073,11 +2071,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
}
}
- const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
// Set the DebugLoc for the copy. Prefer the location of the operand
// if there is one; use the location of the PHI otherwise.
- DbgLoc = PN->getDebugLoc();
+ DbgLoc = PN.getDebugLoc();
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index c7cdb49203b1..81347fa4bd46 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -257,20 +257,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Create Machine PHI nodes for LLVM PHI nodes, lowering them as
// appropriate.
- for (BasicBlock::const_iterator I = BB.begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (PN->use_empty()) continue;
+ for (const PHINode &PN : BB.phis()) {
+ if (PN.use_empty())
+ continue;
// Skip empty types
- if (PN->getType()->isEmptyTy())
+ if (PN.getType()->isEmptyTy())
continue;
- DebugLoc DL = PN->getDebugLoc();
- unsigned PHIReg = ValueMap[PN];
+ DebugLoc DL = PN.getDebugLoc();
+ unsigned PHIReg = ValueMap[&PN];
assert(PHIReg && "PHI node does not have an assigned virtual register!");
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs);
for (EVT VT : ValueVTs) {
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 7643790df350..6a141818bb6d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -139,14 +139,14 @@ class VectorLegalizer {
/// \brief Implements [SU]INT_TO_FP vector promotion.
///
- /// This is a [zs]ext of the input operand to the next size up.
+ /// This is a [zs]ext of the input operand to a larger integer type.
SDValue PromoteINT_TO_FP(SDValue Op);
/// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
///
- /// It is promoted to the next size up integer type. The result is then
+ /// It is promoted to a larger integer type. The result is then
/// truncated back to the original type.
- SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+ SDValue PromoteFP_TO_INT(SDValue Op);
public:
VectorLegalizer(SelectionDAG& dag) :
@@ -431,7 +431,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
// Promote the operation by extending the operand.
- return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+ return PromoteFP_TO_INT(Op);
}
// There are currently two cases of vector promotion:
@@ -472,20 +472,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
- EVT VT = Op.getOperand(0).getValueType();
- assert(Op.getNode()->getNumValues() == 1 &&
- "Can't promote a vector with multiple results!");
-
- // Normal getTypeToPromoteTo() doesn't work here, as that will promote
- // by widening the vector w/ the same element width and twice the number
- // of elements. We want the other way around, the same number of elements,
- // each twice the width.
- //
- // Increase the bitwidth of the element to the next pow-of-two
- // (which is greater than 8 bits).
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
- EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
- assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
SDLoc dl(Op);
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
@@ -505,35 +496,28 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
// elements and then truncate the result. This is different from the default
// PromoteVector which uses bitcast to promote thus assumning that the
// promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
- assert(Op.getNode()->getNumValues() == 1 &&
- "Can't promote a vector with multiple results!");
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
+ MVT VT = Op.getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
- EVT NewVT = VT;
- unsigned NewOpc;
- while (true) {
- NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
- assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
- if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
- NewOpc = ISD::FP_TO_SINT;
- break;
- }
- if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
- NewOpc = ISD::FP_TO_UINT;
- break;
- }
- }
+ unsigned NewOpc = Op->getOpcode();
+ // Change FP_TO_UINT to FP_TO_SINT if possible.
+ // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
+ if (NewOpc == ISD::FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
SDLoc dl(Op);
- SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
+ SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
: ISD::AssertSext,
- dl, NewVT, Promoted,
+ dl, NVT, Promoted,
DAG.getValueType(VT.getScalarType()));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ce1c01b621f0..df1cbeb92740 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3374,11 +3374,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = N->getOperand(0);
- // If some legalization strategy other than widening is used on the operand,
- // we can't safely assume that just extending the low lanes is the correct
- // transformation.
- if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
- return WidenVecOp_Convert(N);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
InOp = GetWidenedVector(InOp);
assert(VT.getVectorNumElements() <
InOp.getValueType().getVectorNumElements() &&
@@ -3440,20 +3438,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
- // Since the result is legal and the input is illegal, it is unlikely that we
- // can fix the input to a legal type so unroll the convert into some scalar
- // code and create a nasty build vector.
+ // Since the result is legal and the input is illegal.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // See if a widened result type would be legal, if so widen the node.
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ InVT.getVectorNumElements());
+ if (TLI.isTypeLegal(WideVT)) {
+ SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
EVT InEltVT = InVT.getVectorElementType();
- unsigned Opcode = N->getOpcode();
+ // Unroll the convert into some scalar code and create a nasty build vector.
SmallVector<SDValue, 16> Ops(NumElts);
for (unsigned i=0; i < NumElts; ++i)
Ops[i] = DAG.getNode(
@@ -3506,8 +3515,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 71cb8cb78f6d..68bbd62e1321 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8940,17 +8940,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// At this point we know that there is a 1-1 correspondence between LLVM PHI
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
- for (BasicBlock::const_iterator I = SuccBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ for (const PHINode &PN : SuccBB->phis()) {
// Ignore dead phi's.
- if (PN->use_empty()) continue;
+ if (PN.use_empty())
+ continue;
// Skip empty types
- if (PN->getType()->isEmptyTy())
+ if (PN.getType()->isEmptyTy())
continue;
unsigned Reg;
- const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
@@ -8977,7 +8977,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index d13ccc263718..befd797e75b4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1445,13 +1445,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (AllPredsVisited) {
- for (BasicBlock::const_iterator I = LLVMBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I)
- FuncInfo->ComputePHILiveOutRegInfo(PN);
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->ComputePHILiveOutRegInfo(&PN);
} else {
- for (BasicBlock::const_iterator I = LLVMBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I)
- FuncInfo->InvalidatePHILiveOutRegInfo(PN);
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
}
FuncInfo->VisitedBBs.insert(LLVMBB);
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 121bed5a79cb..c90a93d7e247 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -712,8 +712,11 @@ bool TargetPassConfig::addCoreISelPasses() {
// Ask the target for an isel.
// Enable GlobalISel if the target wants to, but allow that to be overriden.
+ // Explicitly enabling fast-isel should override implicitly enabled
+ // global-isel.
if (EnableGlobalISel == cl::BOU_TRUE ||
- (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) {
+ (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() &&
+ EnableFastISelOption != cl::BOU_TRUE)) {
if (addIRTranslator())
return true;
@@ -1133,7 +1136,12 @@ bool TargetPassConfig::isGlobalISelEnabled() const {
}
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
- return EnableGlobalISelAbort == 1;
+ if (EnableGlobalISelAbort.getNumOccurrences() > 0)
+ return EnableGlobalISelAbort == 1;
+
+ // When no abort behaviour is specified, we don't abort if the target says
+ // that GISel is enabled.
+ return !isGlobalISelEnabled();
}
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 7ad84734203d..0b16a113640d 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -838,17 +838,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
for (auto &BBMapping : Orig2Clone) {
BasicBlock *OldBlock = BBMapping.first;
BasicBlock *NewBlock = BBMapping.second;
- for (Instruction &OldI : *OldBlock) {
- auto *OldPN = dyn_cast<PHINode>(&OldI);
- if (!OldPN)
- break;
- UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
+ for (PHINode &OldPN : OldBlock->phis()) {
+ UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true);
}
- for (Instruction &NewI : *NewBlock) {
- auto *NewPN = dyn_cast<PHINode>(&NewI);
- if (!NewPN)
- break;
- UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
+ for (PHINode &NewPN : NewBlock->phis()) {
+ UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false);
}
}
@@ -858,17 +852,13 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
BasicBlock *OldBlock = BBMapping.first;
BasicBlock *NewBlock = BBMapping.second;
for (BasicBlock *SuccBB : successors(NewBlock)) {
- for (Instruction &SuccI : *SuccBB) {
- auto *SuccPN = dyn_cast<PHINode>(&SuccI);
- if (!SuccPN)
- break;
-
+ for (PHINode &SuccPN : SuccBB->phis()) {
// Ok, we have a PHI node. Figure out what the incoming value was for
// the OldBlock.
- int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
+ int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock);
if (OldBlockIdx == -1)
break;
- Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
+ Value *IV = SuccPN.getIncomingValue(OldBlockIdx);
// Remap the value if necessary.
if (auto *Inst = dyn_cast<Instruction>(IV)) {
@@ -877,7 +867,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
IV = I->second;
}
- SuccPN->addIncoming(IV, NewBlock);
+ SuccPN.addIncoming(IV, NewBlock);
}
}
}
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 22513924a96d..938c40182b92 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -264,7 +264,8 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const {
}
iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
- return make_range<phi_iterator>(dyn_cast<PHINode>(&front()), nullptr);
+ PHINode *P = empty() ? nullptr : dyn_cast<PHINode>(&*begin());
+ return make_range<phi_iterator>(P, nullptr);
}
/// This method is used to notify a BasicBlock that the
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 534104686d81..1754f7d45011 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2210,24 +2210,23 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
std::sort(Preds.begin(), Preds.end());
- PHINode *PN;
- for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+ for (const PHINode &PN : BB.phis()) {
// Ensure that PHI nodes have at least one entry!
- Assert(PN->getNumIncomingValues() != 0,
+ Assert(PN.getNumIncomingValues() != 0,
"PHI nodes must have at least one entry. If the block is dead, "
"the PHI should be removed!",
- PN);
- Assert(PN->getNumIncomingValues() == Preds.size(),
+ &PN);
+ Assert(PN.getNumIncomingValues() == Preds.size(),
"PHINode should have one entry for each predecessor of its "
"parent basic block!",
- PN);
+ &PN);
// Get and sort all incoming values in the PHI node...
Values.clear();
- Values.reserve(PN->getNumIncomingValues());
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Values.push_back(std::make_pair(PN->getIncomingBlock(i),
- PN->getIncomingValue(i)));
+ Values.reserve(PN.getNumIncomingValues());
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ Values.push_back(
+ std::make_pair(PN.getIncomingBlock(i), PN.getIncomingValue(i)));
std::sort(Values.begin(), Values.end());
for (unsigned i = 0, e = Values.size(); i != e; ++i) {
@@ -2239,12 +2238,12 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
Values[i].second == Values[i - 1].second,
"PHI node has multiple entries for the same basic block with "
"different incoming values!",
- PN, Values[i].first, Values[i].second, Values[i - 1].second);
+ &PN, Values[i].first, Values[i].second, Values[i - 1].second);
// Check to make sure that the predecessors and PHI node entries are
// matched up.
Assert(Values[i].first == Preds[i],
- "PHI node entries do not match predecessors!", PN,
+ "PHI node entries do not match predecessors!", &PN,
Values[i].first, Preds[i]);
}
}
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 3e2150a451e0..c634df99a115 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -447,7 +447,7 @@ bool ELFAsmParser::parseMetadataSym(MCSymbolELF *&Associated) {
Lex();
StringRef Name;
if (getParser().parseIdentifier(Name))
- return true;
+ return TokError("invalid metadata symbol");
Associated = dyn_cast_or_null<MCSymbolELF>(getContext().lookupSymbol(Name));
if (!Associated || !Associated->isInSection())
return TokError("symbol is not in a section: " + Name);
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index cbae16a04ca6..21003c0be7e1 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -412,10 +412,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2)));
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging));
// Eliminate redundancies.
if (Level != O1) {
@@ -450,7 +450,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+ FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
@@ -510,7 +510,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(PGOInstrumentationGen());
FunctionPassManager FPM;
- FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
// Add the profile lowering pass.
@@ -730,7 +731,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
C(OptimizePM, Level);
// First rotate loops that may have been un-rotated by prior passes.
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+ OptimizePM.addPass(
+ createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -777,7 +779,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(LoopUnrollPass(Level));
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
@@ -1533,7 +1535,8 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
DebugLogging))
return false;
// Add the nested pass manager with the appropriate adaptor.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
return true;
}
if (auto Count = parseRepeatPassName(Name)) {
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 4caf4a4fdce0..d95b791972c8 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -873,6 +873,45 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
NewArgv.push_back(nullptr);
}
+void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs) {
+ for (const char *Cur = Source.begin(); Cur != Source.end();) {
+ SmallString<128> Line;
+ // Check for comment line.
+ if (isWhitespace(*Cur)) {
+ while (Cur != Source.end() && isWhitespace(*Cur))
+ ++Cur;
+ continue;
+ }
+ if (*Cur == '#') {
+ while (Cur != Source.end() && *Cur != '\n')
+ ++Cur;
+ continue;
+ }
+ // Find end of the current line.
+ const char *Start = Cur;
+ for (const char *End = Source.end(); Cur != End; ++Cur) {
+ if (*Cur == '\\') {
+ if (Cur + 1 != End) {
+ ++Cur;
+ if (*Cur == '\n' ||
+ (*Cur == '\r' && (Cur + 1 != End) && Cur[1] == '\n')) {
+ Line.append(Start, Cur - 1);
+ if (*Cur == '\r')
+ ++Cur;
+ Start = Cur + 1;
+ }
+ }
+ } else if (*Cur == '\n')
+ break;
+ }
+ // Tokenize line.
+ Line.append(Start, Cur);
+ cl::TokenizeGNUCommandLine(Line, Saver, NewArgv, MarkEOLs);
+ }
+}
+
// It is called byte order marker but the UTF-8 BOM is actually not affected
// by the host system's endianness.
static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
@@ -977,6 +1016,15 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
return AllExpanded;
}
+bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver,
+ SmallVectorImpl<const char *> &Argv) {
+ if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv,
+ /*MarkEOLs*/ false, /*RelativeNames*/ true))
+ return false;
+ return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv,
+ /*MarkEOLs*/ false, /*RelativeNames*/ true);
+}
+
/// ParseEnvironmentOptions - An alternative entry point to the
/// CommandLine library, which allows you to read the program's name
/// from the caller (as PROGNAME) and its command-line arguments from
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6f7b2b6fd5b5..41ed24c329ef 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -632,16 +632,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
// i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
// -> v8f16 conversions.
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 39e3e33b0d27..9023c3dd8c25 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -756,27 +756,31 @@ class ZPRRegOp <string Suffix, AsmOperandClass C,
//******************************************************************************
-// SVE predicate register class.
-def PPR : RegisterClass<"AArch64",
- [nxv16i1, nxv8i1, nxv4i1, nxv2i1],
- 16, (sequence "P%u", 0, 15)> {
+// SVE predicate register classes.
+class PPRClass<int lastreg> : RegisterClass<
+ "AArch64",
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16,
+ (sequence "P%u", 0, lastreg)> {
let Size = 16;
}
-class PPRAsmOperand <string name, int Width>: AsmOperandClass {
+def PPR : PPRClass<15>;
+def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class.
+
+class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {
let Name = "SVE" # name # "Reg";
let PredicateMethod = "isSVEVectorRegOfWidth<"
- # Width # ", AArch64::PPRRegClassID>";
+ # Width # ", " # "AArch64::" # RegClass # "RegClassID>";
let DiagnosticType = "InvalidSVE" # name # "Reg";
let RenderMethod = "addRegOperands";
let ParserMethod = "tryParseSVEPredicateVector";
}
-def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", -1>;
-def PPRAsmOp8 : PPRAsmOperand<"PredicateB", 8>;
-def PPRAsmOp16 : PPRAsmOperand<"PredicateH", 16>;
-def PPRAsmOp32 : PPRAsmOperand<"PredicateS", 32>;
-def PPRAsmOp64 : PPRAsmOperand<"PredicateD", 64>;
+def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", -1>;
+def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>;
+def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
+def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
+def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
@@ -784,6 +788,18 @@ def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
+def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", -1>;
+def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
+def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
+def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
+def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
+
+def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>;
+def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>;
+def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>;
+def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>;
+def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>;
+
//******************************************************************************
// SVE vector register class
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 0e6ad944c141..5d00dc58a5ab 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -136,7 +136,7 @@ static cl::opt<bool>
static cl::opt<int> EnableGlobalISelAtO(
"aarch64-enable-global-isel-at-O", cl::Hidden,
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
- cl::init(-1));
+ cl::init(0));
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6e63783e5646..ac9ff51f69f1 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -819,6 +819,10 @@ public:
}
bool isReg() const override {
+ return Kind == k_Register;
+ }
+
+ bool isScalarReg() const {
return Kind == k_Register && Reg.Kind == RegKind::Scalar;
}
@@ -839,6 +843,7 @@ public:
RK = RegKind::SVEDataVector;
break;
case AArch64::PPRRegClassID:
+ case AArch64::PPR_3bRegClassID:
RK = RegKind::SVEPredicateVector;
break;
default:
@@ -3148,7 +3153,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
return true;
if (Operands.size() < 2 ||
- !static_cast<AArch64Operand &>(*Operands[1]).isReg())
+ !static_cast<AArch64Operand &>(*Operands[1]).isScalarReg())
return Error(Loc, "Only valid when first operand is register");
bool IsXReg =
@@ -3648,6 +3653,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
case Match_InvalidSVEPredicateSReg:
case Match_InvalidSVEPredicateDReg:
return Error(Loc, "invalid predicate register.");
+ case Match_InvalidSVEPredicate3bAnyReg:
+ case Match_InvalidSVEPredicate3bBReg:
+ case Match_InvalidSVEPredicate3bHReg:
+ case Match_InvalidSVEPredicate3bSReg:
+ case Match_InvalidSVEPredicate3bDReg:
+ return Error(Loc, "restricted predicate has range [0, 7].");
default:
llvm_unreachable("unexpected error code!");
}
@@ -3670,7 +3681,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumOperands == 4 && Tok == "lsl") {
AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
- if (Op2.isReg() && Op3.isImm()) {
+ if (Op2.isScalarReg() && Op3.isImm()) {
const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
if (Op3CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3702,7 +3713,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand LSBOp = static_cast<AArch64Operand &>(*Operands[2]);
AArch64Operand WidthOp = static_cast<AArch64Operand &>(*Operands[3]);
- if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) {
+ if (Op1.isScalarReg() && LSBOp.isImm() && WidthOp.isImm()) {
const MCConstantExpr *LSBCE = dyn_cast<MCConstantExpr>(LSBOp.getImm());
const MCConstantExpr *WidthCE = dyn_cast<MCConstantExpr>(WidthOp.getImm());
@@ -3758,7 +3769,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) {
const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
@@ -3822,7 +3833,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) {
const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
@@ -3901,7 +3912,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
- if (Op.isReg()) {
+ if (Op.isScalarReg()) {
unsigned Reg = getXRegFromWReg(Op.getReg());
Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
Op.getStartLoc(), Op.getEndLoc(),
@@ -3911,13 +3922,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// FIXME: Likewise for sxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
- if (Op.isReg() &&
+ if (Op.isScalarReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
- if (Op.isReg()) {
+ if (Op.isScalarReg()) {
unsigned Reg = getXRegFromWReg(Op.getReg());
Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
Op.getStartLoc(),
@@ -3928,13 +3939,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// FIXME: Likewise for uxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
- if (Op.isReg() &&
+ if (Op.isScalarReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR32. Twiddle it here if necessary.
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
- if (Op.isReg()) {
+ if (Op.isScalarReg()) {
unsigned Reg = getWRegFromXReg(Op.getReg());
Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
Op.getStartLoc(),
@@ -4077,6 +4088,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSVEPredicateHReg:
case Match_InvalidSVEPredicateSReg:
case Match_InvalidSVEPredicateDReg:
+ case Match_InvalidSVEPredicate3bAnyReg:
+ case Match_InvalidSVEPredicate3bBReg:
+ case Match_InvalidSVEPredicate3bHReg:
+ case Match_InvalidSVEPredicate3bSReg:
+ case Match_InvalidSVEPredicate3bDReg:
case Match_MSR:
case Match_MRS: {
if (ErrorInfo >= Operands.size())
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index ae278caeda69..30438a159fbc 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -91,6 +91,9 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decode);
+LLVM_ATTRIBUTE_UNUSED static DecodeStatus
+DecodePPR_3bRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address,
+ const void *Decode);
static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
uint64_t Address,
@@ -481,6 +484,16 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
return Success;
}
+static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void* Decoder) {
+ if (RegNo > 7)
+ return Fail;
+
+ // Just reuse the PPR decode table
+ return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder);
+}
+
static const unsigned VectorDecoderTable[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 7b33b4b5b542..4d1d3fd57353 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -605,10 +605,10 @@ public:
}
MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TheTriple,
- StringRef CPU,
const MCTargetOptions &Options) {
+ const Triple &TheTriple = STI.getTargetTriple();
if (TheTriple.isOSBinFormatMachO())
return new DarwinAArch64AsmBackend(T, TheTriple, MRI);
@@ -624,10 +624,10 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
}
MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TheTriple,
- StringRef CPU,
const MCTargetOptions &Options) {
+ const Triple &TheTriple = STI.getTargetTriple();
assert(TheTriple.isOSBinFormatELF() &&
"Big endian is only supported for ELF targets!");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index b9e1673b9317..a5720e0e8b87 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -45,12 +45,12 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
MCAsmBackend *createAArch64beAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ebf656c549ec..2e3a453f9c75 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -815,6 +815,10 @@ public:
class AMDGPUAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
+ // Number of extra operands parsed after the first optional operand.
+ // This may be necessary to skip hardcoded mandatory operands.
+ static const unsigned MAX_OPR_LOOKAHEAD = 1;
+
unsigned ForcedEncodingSize = 0;
bool ForcedDPP = false;
bool ForcedSDWA = false;
@@ -1037,6 +1041,7 @@ private:
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
+ OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
OperandMatchResultTy parseExpTgt(OperandVector &Operands);
OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
@@ -3859,7 +3864,7 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
} else {
// Swizzle "offset" operand is optional.
// If it is omitted, try parsing other optional operands.
- return parseOptionalOperand(Operands);
+ return parseOptionalOpr(Operands);
}
}
@@ -4179,6 +4184,39 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
+ unsigned size = Operands.size();
+ assert(size > 0);
+
+ OperandMatchResultTy res = parseOptionalOpr(Operands);
+
+ // This is a hack to enable hardcoded mandatory operands which follow
+ // optional operands.
+ //
+ // Current design assumes that all operands after the first optional operand
+ // are also optional. However implementation of some instructions violates
+ // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
+ //
+ // To alleviate this problem, we have to (implicitly) parse extra operands
+ // to make sure autogenerated parser of custom operands never hit hardcoded
+ // mandatory operands.
+
+ if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
+
+ // We have parsed the first optional operand.
+ // Parse as many operands as necessary to skip all mandatory operands.
+
+ for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
+ if (res != MatchOperand_Success ||
+ getLexer().is(AsmToken::EndOfStatement)) break;
+ if (getLexer().is(AsmToken::Comma)) Parser.Lex();
+ res = parseOptionalOpr(Operands);
+ }
+ }
+
+ return res;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
OperandMatchResultTy res;
for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
// try to parse any optional operand here
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 778d4a7ba9d0..d700acc34bc9 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -198,9 +198,9 @@ public:
} // end anonymous namespace
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
// Use 64-bit ELF for amdgcn
- return new ELFAMDGPUAsmBackend(T, TT);
+ return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 0b3563303ad0..1173dfd437ca 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -45,8 +45,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 30a2df510386..651265fc54d5 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -71,9 +71,9 @@ class MIMG_Store_Helper <bits<7> op, string asm,
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
let ssamp = 0;
- let mayLoad = 1; // TableGen requires this for matching with the intrinsics
+ let mayLoad = 0;
let mayStore = 1;
- let hasSideEffects = 1;
+ let hasSideEffects = 0;
let hasPostISelHook = 0;
let DisableWQM = 1;
}
@@ -103,10 +103,10 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
(ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
- asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
- > {
+ asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> {
+ let mayLoad = 1;
let mayStore = 1;
- let hasSideEffects = 1;
+ let hasSideEffects = 1; // FIXME: Remove this
let hasPostISelHook = 0;
let DisableWQM = 1;
let Constraints = "$vdst = $vdata";
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 50ee88fa635a..415d8a512aa8 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -575,6 +575,221 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
+
+ // Image load.
+ case Intrinsic::amdgcn_image_load:
+ case Intrinsic::amdgcn_image_load_mip:
+
+ // Sample.
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ // Sample with comparison.
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ // Sample with offsets.
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ // Sample with comparison and offsets.
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ // Basic gather4
+ case Intrinsic::amdgcn_image_gather4:
+ case Intrinsic::amdgcn_image_gather4_cl:
+ case Intrinsic::amdgcn_image_gather4_l:
+ case Intrinsic::amdgcn_image_gather4_b:
+ case Intrinsic::amdgcn_image_gather4_b_cl:
+ case Intrinsic::amdgcn_image_gather4_lz:
+
+ // Gather4 with comparison
+ case Intrinsic::amdgcn_image_gather4_c:
+ case Intrinsic::amdgcn_image_gather4_c_cl:
+ case Intrinsic::amdgcn_image_gather4_c_l:
+ case Intrinsic::amdgcn_image_gather4_c_b:
+ case Intrinsic::amdgcn_image_gather4_c_b_cl:
+ case Intrinsic::amdgcn_image_gather4_c_lz:
+
+ // Gather4 with offsets
+ case Intrinsic::amdgcn_image_gather4_o:
+ case Intrinsic::amdgcn_image_gather4_cl_o:
+ case Intrinsic::amdgcn_image_gather4_l_o:
+ case Intrinsic::amdgcn_image_gather4_b_o:
+ case Intrinsic::amdgcn_image_gather4_b_cl_o:
+ case Intrinsic::amdgcn_image_gather4_lz_o:
+
+ // Gather4 with comparison and offsets
+ case Intrinsic::amdgcn_image_gather4_c_o:
+ case Intrinsic::amdgcn_image_gather4_c_cl_o:
+ case Intrinsic::amdgcn_image_gather4_c_l_o:
+ case Intrinsic::amdgcn_image_gather4_c_b_o:
+ case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
+ case Intrinsic::amdgcn_image_gather4_c_lz_o: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(1));
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_store:
+ case Intrinsic::amdgcn_image_store_mip: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+ Info.flags = MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+ Info.align = 0;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_atomic_swap:
+ case Intrinsic::amdgcn_image_atomic_add:
+ case Intrinsic::amdgcn_image_atomic_sub:
+ case Intrinsic::amdgcn_image_atomic_smin:
+ case Intrinsic::amdgcn_image_atomic_umin:
+ case Intrinsic::amdgcn_image_atomic_smax:
+ case Intrinsic::amdgcn_image_atomic_umax:
+ case Intrinsic::amdgcn_image_atomic_and:
+ case Intrinsic::amdgcn_image_atomic_or:
+ case Intrinsic::amdgcn_image_atomic_xor:
+ case Intrinsic::amdgcn_image_atomic_inc:
+ case Intrinsic::amdgcn_image_atomic_dec: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
+ return true;
+ }
+ case Intrinsic::amdgcn_image_atomic_cmpswap: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = MFI->getImagePSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(3));
+
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+
+ // XXX - Should this be volatile without known ordering?
+ Info.flags |= MachineMemOperand::MOVolatile;
+ return true;
+ }
+ case Intrinsic::amdgcn_tbuffer_load:
+ case Intrinsic::amdgcn_buffer_load:
+ case Intrinsic::amdgcn_buffer_load_format: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = MFI->getBufferPSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(0));
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable;
+
+ // There is a constant offset component, but there are additional register
+ // offsets which could break AA if we set the offset to anything non-0.
+ return true;
+ }
+ case Intrinsic::amdgcn_tbuffer_store:
+ case Intrinsic::amdgcn_buffer_store:
+ case Intrinsic::amdgcn_buffer_store_format: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.ptrVal = MFI->getBufferPSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(1));
+ Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+ Info.flags = MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable;
+ return true;
+ }
+ case Intrinsic::amdgcn_buffer_atomic_swap:
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ case Intrinsic::amdgcn_buffer_atomic_xor: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = MFI->getBufferPSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(1));
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
+ case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = MFI->getBufferPSV(
+ *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+ CI.getArgOperand(2));
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOVolatile;
+ return true;
+ }
default:
return false;
}
@@ -2946,24 +3161,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
if (TII->isMIMG(MI)) {
- if (!MI.memoperands_empty())
- return BB;
+ if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
+ report_fatal_error("missing mem operand from MIMG instruction");
+ }
// Add a memoperand for mimg instructions so that they aren't assumed to
// be ordered memory instuctions.
- MachinePointerInfo PtrInfo(MFI->getImagePSV());
- MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
- if (MI.mayStore())
- Flags |= MachineMemOperand::MOStore;
-
- if (MI.mayLoad())
- Flags |= MachineMemOperand::MOLoad;
-
- if (Flags != MachineMemOperand::MODereferenceable) {
- auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
- MI.addMemOperand(*MF, MMO);
- }
-
return BB;
}
@@ -4257,7 +4460,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
SDLoc DL(Op);
- MachineFunction &MF = DAG.getMachineFunction();
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
@@ -4284,21 +4486,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // glc
Op.getOperand(6) // slc
};
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(MFI->getBufferPSV()),
- MachineMemOperand::MOLoad,
- VT.getStoreSize(), VT.getStoreSize());
-
- return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
+ auto *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand());
}
case Intrinsic::amdgcn_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
@@ -4312,14 +4511,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(10) // slc
};
- EVT VT = Op.getOperand(2).getValueType();
+ EVT VT = Op.getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad,
- VT.getStoreSize(), VT.getStoreSize());
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
+ Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
@@ -4339,14 +4534,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // offset
Op.getOperand(6) // slc
};
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOVolatile,
- VT.getStoreSize(), 4);
+ EVT VT = Op.getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
unsigned Opcode = 0;
switch (IntrID) {
@@ -4384,7 +4574,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
llvm_unreachable("unhandled atomic opcode");
}
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
@@ -4397,17 +4588,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // offset
Op.getOperand(7) // slc
};
- EVT VT = Op.getOperand(4).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOVolatile,
- VT.getStoreSize(), 4);
+ EVT VT = Op.getValueType();
+ auto *M = cast<MemSDNode>(Op);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
- Op->getVTList(), Ops, VT, MMO);
+ Op->getVTList(), Ops, VT, M->getMemOperand());
}
// Basic sample.
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6013ebc81d9f..888d8f978aff 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -28,8 +28,6 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
- BufferPSV(*(MF.getSubtarget().getInstrInfo())),
- ImagePSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 5dde72910ee3..02e63f0258e6 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -34,12 +34,14 @@ namespace llvm {
class MachineFrameInfo;
class MachineFunction;
+class SIInstrInfo;
class TargetRegisterClass;
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
+ // TODO: Is the img rsrc useful?
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+ PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
@@ -135,8 +137,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Stack object indices for work item IDs.
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
- AMDGPUBufferPseudoSourceValue BufferPSV;
- AMDGPUImagePseudoSourceValue ImagePSV;
+ DenseMap<const Value *,
+ std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
+ DenseMap<const Value *,
+ std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
private:
unsigned LDSWaveSpillSize = 0;
@@ -629,12 +633,22 @@ public:
return LDSWaveSpillSize;
}
- const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
- return &BufferPSV;
+ const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
+ const Value *BufferRsrc) {
+ assert(BufferRsrc);
+ auto PSV = BufferPSVs.try_emplace(
+ BufferRsrc,
+ llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
+ return PSV.first->second.get();
}
- const AMDGPUImagePseudoSourceValue *getImagePSV() const {
- return &ImagePSV;
+ const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
+ const Value *ImgRsrc) {
+ assert(ImgRsrc);
+ auto PSV = ImagePSVs.try_emplace(
+ ImgRsrc,
+ llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
+ return PSV.first->second.get();
}
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1cb9dd44f789..ff507ab7162f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -172,8 +172,8 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
}
unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
- bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2];
- bool HasV8MBaselineOps = STI->getFeatureBits()[ARM::HasV8MBaselineOps];
+ bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2];
+ bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps];
switch (Op) {
default:
@@ -389,7 +389,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
case FK_SecRel_4:
return Value;
case ARM::fixup_arm_movt_hi16:
- if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+ if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_arm_movw_lo16: {
@@ -401,7 +401,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
return Value;
}
case ARM::fixup_t2_movt_hi16:
- if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+ if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_t2_movw_lo16: {
@@ -591,7 +591,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
case ARM::fixup_arm_thumb_cp:
// On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
// could have an error on our hands.
- if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ if (!STI.getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -615,8 +615,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
- !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
+ if (!STI.getFeatureBits()[ARM::FeatureThumb2] &&
+ !STI.getFeatureBits()[ARM::HasV8MBaselineOps]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -626,7 +626,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ if (!STI.getFeatureBits()[ARM::FeatureThumb2]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -1154,51 +1154,52 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
}
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TheTriple, StringRef CPU,
const MCTargetOptions &Options,
bool isLittle) {
+ const Triple &TheTriple = STI.getTargetTriple();
switch (TheTriple.getObjectFormat()) {
default:
llvm_unreachable("unsupported object format");
case Triple::MachO: {
MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
- return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
+ return new ARMAsmBackendDarwin(T, STI, MRI, CS);
}
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return new ARMAsmBackendWinCOFF(T, TheTriple);
+ return new ARMAsmBackendWinCOFF(T, STI);
case Triple::ELF:
assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- return new ARMAsmBackendELF(T, TheTriple, OSABI, isLittle);
+ return new ARMAsmBackendELF(T, STI, OSABI, isLittle);
}
}
MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
+ return createARMAsmBackend(T, STI, MRI, Options, true);
}
MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
+ return createARMAsmBackend(T, STI, MRI, Options, false);
}
MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
+ return createARMAsmBackend(T, STI, MRI, Options, true);
}
MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
+ return createARMAsmBackend(T, STI, MRI, Options, false);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 02374966dafe..c8527e5cca20 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -19,22 +19,20 @@
namespace llvm {
class ARMAsmBackend : public MCAsmBackend {
- const MCSubtargetInfo *STI;
+ const MCSubtargetInfo &STI;
bool isThumbMode; // Currently emitting Thumb code.
bool IsLittleEndian; // Big or little endian.
public:
- ARMAsmBackend(const Target &T, const Triple &TT, bool IsLittle)
- : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
- isThumbMode(TT.getArchName().startswith("thumb")),
+ ARMAsmBackend(const Target &T, const MCSubtargetInfo &STI, bool IsLittle)
+ : MCAsmBackend(), STI(STI),
+ isThumbMode(STI.getTargetTriple().isThumb()),
IsLittleEndian(IsLittle) {}
- ~ARMAsmBackend() override { delete STI; }
-
unsigned getNumFixupKinds() const override {
return ARM::NumTargetFixupKinds;
}
- bool hasNOP() const { return STI->getFeatureBits()[ARM::HasV6T2Ops]; }
+ bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index f05e3a6f1160..19e3fdb72046 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -19,10 +19,10 @@ class ARMAsmBackendDarwin : public ARMAsmBackend {
const MCRegisterInfo &MRI;
public:
const MachO::CPUSubTypeARM Subtype;
- ARMAsmBackendDarwin(const Target &T, const Triple &TT,
+ ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
- }
+ : ARMAsmBackend(T, STI, /* IsLittleEndian */ true), MRI(MRI),
+ Subtype(st) {}
std::unique_ptr<MCObjectWriter>
createObjectWriter(raw_pwrite_stream &OS) const override {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index d0f5419a1b0f..361ea3040847 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -20,9 +20,9 @@ namespace {
class ARMAsmBackendELF : public ARMAsmBackend {
public:
uint8_t OSABI;
- ARMAsmBackendELF(const Target &T, const Triple &TT, uint8_t OSABI,
+ ARMAsmBackendELF(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI,
bool IsLittle)
- : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
+ : ARMAsmBackend(T, STI, IsLittle), OSABI(OSABI) {}
std::unique_ptr<MCObjectWriter>
createObjectWriter(raw_pwrite_stream &OS) const override {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 53b9c29446a3..0ac6d4270aac 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -17,8 +17,8 @@ using namespace llvm;
namespace {
class ARMAsmBackendWinCOFF : public ARMAsmBackend {
public:
- ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple)
- : ARMAsmBackend(T, TheTriple, true) {}
+ ARMAsmBackendWinCOFF(const Target &T, const MCSubtargetInfo &STI)
+ : ARMAsmBackend(T, STI, true) {}
std::unique_ptr<MCObjectWriter>
createObjectWriter(raw_pwrite_stream &OS) const override {
return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 0fb97e5fee97..df9874c78d07 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -68,27 +68,27 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createARMAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options,
bool IsLittleEndian);
-MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
-MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
MCAsmBackend *createThumbLEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
MCAsmBackend *createThumbBEAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
// Construct a PE/COFF machine code streamer which will generate a PE/COFF
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 2d9dd4f8f83f..2f5e9f02e53c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -476,10 +476,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
}
}
-MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const llvm::MCTargetOptions &TO) {
- return new AVRAsmBackend(TT.getOS());
+ return new AVRAsmBackend(STI.getTargetTriple().getOS());
}
} // end of namespace llvm
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index 5615fd72e456..fcfd8cf82292 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -26,6 +26,7 @@ class MCContext;
class MCInstrInfo;
class MCObjectWriter;
class MCRegisterInfo;
+class MCSubtargetInfo;
class MCTargetOptions;
class StringRef;
class Target;
@@ -42,8 +43,8 @@ MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx);
/// Creates an assembly backend for AVR.
-MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const llvm::MCTargetOptions &TO);
/// Creates an ELF object writer for AVR.
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index e6ea92e08364..6593d9d018fd 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -104,15 +104,15 @@ BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
}
MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
- const MCTargetOptions&) {
+ const MCTargetOptions &) {
return new BPFAsmBackend(/*IsLittleEndian=*/true);
}
MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
- const MCTargetOptions&) {
+ const MCTargetOptions &) {
return new BPFAsmBackend(/*IsLittleEndian=*/false);
}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 6466042f6929..a6dac3abca02 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -45,11 +45,11 @@ MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
-MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter> createBPFELFObjectWriter(raw_pwrite_stream &OS,
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index fd602257934a..2646d0bcbf47 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1050,14 +1050,11 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
// Check if the exit values have types that are no wider than the type
// that we want to promote to.
unsigned DestBW = DestTy->getBitWidth();
- for (Instruction &In : *ExitB) {
- PHINode *P = dyn_cast<PHINode>(&In);
- if (!P)
- break;
- if (P->getNumIncomingValues() != 1)
+ for (PHINode &P : ExitB->phis()) {
+ if (P.getNumIncomingValues() != 1)
return false;
- assert(P->getIncomingBlock(0) == LoopB);
- IntegerType *T = dyn_cast<IntegerType>(P->getType());
+ assert(P.getIncomingBlock(0) == LoopB);
+ IntegerType *T = dyn_cast<IntegerType>(P.getType());
if (!T || T->getBitWidth() > DestBW)
return false;
}
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index cdc2085986a5..98229f4fa64a 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -2925,6 +2925,23 @@ let Predicates = [UseHVX] in {
def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
def vzero: PatFrag<(ops), (HexagonVZERO)>;
+def VSxtb: OutPatFrag<(ops node:$Vs),
+ (V6_vshuffvdd (HiVec (V6_vsb $Vs)),
+ (LoVec (V6_vsb $Vs)),
+ (A2_tfrsi -2))>;
+def VSxth: OutPatFrag<(ops node:$Vs),
+ (V6_vshuffvdd (HiVec (V6_vsh $Vs)),
+ (LoVec (V6_vsh $Vs)),
+ (A2_tfrsi -4))>;
+def VZxtb: OutPatFrag<(ops node:$Vs),
+ (V6_vshuffvdd (HiVec (V6_vzb $Vs)),
+ (LoVec (V6_vzb $Vs)),
+ (A2_tfrsi -2))>;
+def VZxth: OutPatFrag<(ops node:$Vs),
+ (V6_vshuffvdd (HiVec (V6_vzh $Vs)),
+ (LoVec (V6_vzh $Vs)),
+ (A2_tfrsi -4))>;
+
let Predicates = [UseHVX] in {
def: Pat<(VecI8 vzero), (V6_vd0)>;
def: Pat<(VecI16 vzero), (V6_vd0)>;
@@ -2970,25 +2987,18 @@ let Predicates = [UseHVX] in {
def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(VecPI16 (sext HVI8:$Vs)), (V6_vsb HvxVR:$Vs)>;
- def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>;
- def: Pat<(VecPI16 (zext HVI8:$Vs)), (V6_vzb HvxVR:$Vs)>;
- def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>;
-
- def: Pat<(sext_inreg HVI32:$Vs, v16i16),
- (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
- (HiVec (V6_vsh HvxVR:$Vs)))>;
- def: Pat<(sext_inreg HVI32:$Vs, v32i16),
- (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
- (HiVec (V6_vsh HvxVR:$Vs)))>;
+ def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
+ def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
+ def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>;
+ def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;
- def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (V6_vsb HvxVR:$Vs))>;
- def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>;
+ def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
+ def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
- (LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>;
+ (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
- def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (V6_vzb HvxVR:$Vs))>;
- def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>;
+ def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
+ def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
- (LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>;
+ (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index b3ab6763281c..fe54c19370b3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -765,11 +765,12 @@ public:
// MCAsmBackend
MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T,
- MCRegisterInfo const & /*MRI*/,
- const Triple &TT, StringRef CPU,
- const MCTargetOptions &Options) {
+ const MCSubtargetInfo &STI,
+ MCRegisterInfo const & /*MRI*/,
+ const MCTargetOptions &Options) {
+ const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- StringRef CPUString = Hexagon_MC::selectHexagonCPU(CPU);
+ StringRef CPUString = Hexagon_MC::selectHexagonCPU(STI.getCPU());
return new HexagonAsmBackend(T, TT, OSABI, CPUString);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 05d17c368dcc..71545a5c02c9 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -61,8 +61,8 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &MCT);
MCAsmBackend *createHexagonAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index c4935746f5ad..e3eaa4d30a90 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -165,9 +165,10 @@ LanaiAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
} // namespace
MCAsmBackend *llvm::createLanaiAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo & /*MRI*/,
- const Triple &TT, StringRef /*CPU*/,
const MCTargetOptions & /*Options*/) {
+ const Triple &TT = STI.getTargetTriple();
if (!TT.isOSBinFormatELF())
llvm_unreachable("OS not supported");
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index 5bc84ad83870..ddb4e9b0d728 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -38,8 +38,8 @@ MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TheTriple, StringRef CPU,
+MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 1ad524c06969..acbc6d37e24b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -476,8 +476,9 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
}
MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- return new MipsAsmBackend(T, MRI, TT, CPU, Options.ABIName == "n32");
+ return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
+ Options.ABIName == "n32");
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index abbf08ed212f..5dab6c3e81d6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -45,8 +45,8 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createMipsAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 6448fd917560..79ca9cc6b800 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3863,13 +3863,17 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'c': // register suitable for indirect jump
if (VT == MVT::i32)
return std::make_pair((unsigned)Mips::T9, &Mips::GPR32RegClass);
- assert(VT == MVT::i64 && "Unexpected type.");
- return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
- case 'l': // register suitable for indirect jump
+ if (VT == MVT::i64)
+ return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
+ // This will generate an error message
+ return std::make_pair(0U, nullptr);
+ case 'l': // use the `lo` register to store values
+ // that are no bigger than a word
if (VT == MVT::i32)
return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
- case 'x': // register suitable for indirect jump
+ case 'x': // use the concatenated `hi` and `lo` registers
+ // to store doubleword values
// Fixme: Not triggering the use of both hi and low
// This will generate an error message
return std::make_pair(0U, nullptr);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 2a1de244da92..728e7757fd28 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
@@ -231,9 +232,10 @@ namespace {
} // end anonymous namespace
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
+ const Triple &TT = STI.getTargetTriple();
if (TT.isOSDarwin())
return new DarwinPPCAsmBackend(T);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 80a74c09a598..d47b9a6e452c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -29,6 +29,7 @@ class MCContext;
class MCInstrInfo;
class MCObjectWriter;
class MCRegisterInfo;
+class MCSubtargetInfo;
class MCTargetOptions;
class Target;
class Triple;
@@ -43,8 +44,8 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
/// Construct an PPC ELF object writer.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index cea59de3e8a9..f9de65fcb1df 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4397,13 +4397,18 @@ hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
static bool
areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
CallingConv::ID CalleeCC) {
- // Tail or Sibling call optimization (TCO/SCO) needs callee and caller to
- // have the same calling convention.
- if (CallerCC != CalleeCC)
+ // Tail calls are possible with fastcc and ccc.
+ auto isTailCallableCC = [] (CallingConv::ID CC){
+ return CC == CallingConv::C || CC == CallingConv::Fast;
+ };
+ if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
return false;
- // Tail or Sibling calls can be done with fastcc/ccc.
- return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C);
+ // We can safely tail call both fastcc and ccc callees from a c calling
+ // convention caller. If the caller is fastcc, we may have less stack space
+ // than a non-fastcc caller with the same signature so disable tail-calls in
+ // that case.
+ return CallerCC == CallingConv::C || CallerCC == CalleeCC;
}
bool
@@ -4434,10 +4439,28 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// Callee contains any byval parameter is not supported, too.
// Note: This is a quick work around, because in some cases, e.g.
// caller's stack size > callee's stack size, we are still able to apply
- // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
+ // sibling call optimization. For example, gcc is able to do SCO for caller1
+ // in the following example, but not for caller2.
+ // struct test {
+ // long int a;
+ // char ary[56];
+ // } gTest;
+ // __attribute__((noinline)) int callee(struct test v, struct test *b) {
+ // b->a = v.a;
+ // return 0;
+ // }
+ // void caller1(struct test a, struct test c, struct test *b) {
+ // callee(gTest, b); }
+ // void caller2(struct test *b) { callee(gTest, b); }
if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
return false;
+ // If callee and caller use different calling conventions, we cannot pass
+ // parameters on stack since offsets for the parameter area may be different.
+ if (Caller.getCallingConv() != CalleeCC &&
+ needStackSlotPassParameters(Subtarget, Outs))
+ return false;
+
// No TCO/SCO on indirect call because Caller have to restore its TOC
if (!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee))
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index ffb5cc8757f2..fb16700a5e17 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2433,7 +2433,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
int64_t MB = MI.getOperand(3).getImm();
APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
InVal = InVal.rotl(SH);
- uint64_t Mask = (1LU << (63 - MB + 1)) - 1;
+ uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
@@ -2457,8 +2457,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
int64_t ME = MI.getOperand(4).getImm();
APInt InVal(32, SExtImm, true);
InVal = InVal.rotl(SH);
- // Set the bits ( MB + 32 ) to ( ME + 32 ).
- uint64_t Mask = ((1 << (32 - MB)) - 1) & ~((1 << (31 - ME)) - 1);
+ // Set the bits ( MB + 32 ) to ( ME + 32 ).
+ uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
InVal &= Mask;
// Can't replace negative values with an LI as that will sign-extend
// and not clear the left bits. If we're setting the CR bit, we will use
@@ -2527,6 +2527,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
III.ConstantOpNo = 2;
III.ImmWidth = 16;
III.ImmMustBeMultipleOf = 1;
+ III.TruncateImmTo = 0;
switch (Opc) {
default: return false;
case PPC::ADD4:
@@ -2600,10 +2601,6 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
case PPC::RLWNM8:
case PPC::RLWNMo:
case PPC::RLWNM8o:
- case PPC::RLDCL:
- case PPC::RLDCLo:
- case PPC::RLDCR:
- case PPC::RLDCRo:
case PPC::SLW:
case PPC::SLW8:
case PPC::SLWo:
@@ -2614,29 +2611,26 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
case PPC::SRW8o:
case PPC::SRAW:
case PPC::SRAWo:
- case PPC::SLD:
- case PPC::SLDo:
- case PPC::SRD:
- case PPC::SRDo:
- case PPC::SRAD:
- case PPC::SRADo:
III.SignedImm = false;
III.ZeroIsSpecialOrig = 0;
III.ZeroIsSpecialNew = 0;
III.IsCommutative = false;
// This isn't actually true, but the instructions ignore any of the
// upper bits, so any immediate loaded with an LI is acceptable.
+ // This does not apply to shift right algebraic because a value
+ // out of range will produce a -1/0.
III.ImmWidth = 16;
+ if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 ||
+ Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o)
+ III.TruncateImmTo = 5;
+ else
+ III.TruncateImmTo = 6;
switch(Opc) {
default: llvm_unreachable("Unknown opcode");
case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
- case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
- case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
- case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
- case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
@@ -2645,14 +2639,62 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
- case PPC::SRAW: III.ImmOpcode = PPC::SRAWI; break;
- case PPC::SRAWo: III.ImmOpcode = PPC::SRAWIo; break;
+ case PPC::SRAW:
+ III.ImmWidth = 5;
+ III.TruncateImmTo = 0;
+ III.ImmOpcode = PPC::SRAWI;
+ break;
+ case PPC::SRAWo:
+ III.ImmWidth = 5;
+ III.TruncateImmTo = 0;
+ III.ImmOpcode = PPC::SRAWIo;
+ break;
+ }
+ break;
+ case PPC::RLDCL:
+ case PPC::RLDCLo:
+ case PPC::RLDCR:
+ case PPC::RLDCRo:
+ case PPC::SLD:
+ case PPC::SLDo:
+ case PPC::SRD:
+ case PPC::SRDo:
+ case PPC::SRAD:
+ case PPC::SRADo:
+ III.SignedImm = false;
+ III.ZeroIsSpecialOrig = 0;
+ III.ZeroIsSpecialNew = 0;
+ III.IsCommutative = false;
+ // This isn't actually true, but the instructions ignore any of the
+ // upper bits, so any immediate loaded with an LI is acceptable.
+ // This does not apply to shift right algebraic because a value
+ // out of range will produce a -1/0.
+ III.ImmWidth = 16;
+ if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo ||
+ Opc == PPC::RLDCR || Opc == PPC::RLDCRo)
+ III.TruncateImmTo = 6;
+ else
+ III.TruncateImmTo = 7;
+ switch(Opc) {
+ default: llvm_unreachable("Unknown opcode");
+ case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
+ case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
+ case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
+ case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
- case PPC::SRAD: III.ImmOpcode = PPC::SRADI; break;
- case PPC::SRADo: III.ImmOpcode = PPC::SRADIo; break;
+ case PPC::SRAD:
+ III.ImmWidth = 6;
+ III.TruncateImmTo = 0;
+ III.ImmOpcode = PPC::SRADI;
+ break;
+ case PPC::SRADo:
+ III.ImmWidth = 6;
+ III.TruncateImmTo = 0;
+ III.ImmOpcode = PPC::SRADIo;
+ break;
}
break;
// Loads and stores:
@@ -2866,6 +2908,8 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
return false;
if (Imm % III.ImmMustBeMultipleOf)
return false;
+ if (III.TruncateImmTo)
+ Imm &= ((1 << III.TruncateImmTo) - 1);
if (III.SignedImm) {
APInt ActualValue(64, Imm, true);
if (!ActualValue.isSignedIntN(III.ImmWidth))
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 4271c50127a1..8bfb8bc88097 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -97,6 +97,8 @@ struct ImmInstrInfo {
uint64_t ImmOpcode : 16;
// The size of the immediate.
uint64_t ImmWidth : 5;
+ // The immediate should be truncated to N bits.
+ uint64_t TruncateImmTo : 5;
};
// Information required to convert an instruction to just a materialized
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 474661aaaee8..a4c7a030389b 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -55,7 +55,7 @@ FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
"convert reg-reg instructions to reg-imm"));
static cl::opt<bool>
-ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(false),
+ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
cl::desc("Convert eligible reg+reg instructions to reg+imm"));
static cl::opt<bool>
diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 9501f0f89b81..d524c354ed35 100644
--- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -35,7 +35,7 @@ STATISTIC(NumRemovedInPreEmit,
"Number of instructions deleted in pre-emit peephole");
static cl::opt<bool>
-RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(false),
+RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
namespace {
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index b91467fe1455..6e06a4975e2a 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -230,9 +230,10 @@ RISCVAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
} // end anonymous namespace
MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
+ const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
return new RISCVAsmBackend(OSABI, TT.isArch64Bit());
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index bea2f8800fa6..ef58a6b8cbca 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -40,8 +40,8 @@ MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 7d32954936be..805ca7dd956e 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -580,7 +580,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
}
MachineFunction &MF = DAG.getMachineFunction();
- MVT XLenVT = Subtarget.getXLenVT();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
if (IsVarArg)
@@ -593,7 +592,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
+ assert(VA.getLocVT() == Subtarget.getXLenVT() && "Unhandled argument type");
SDValue ArgValue;
if (VA.isRegLoc())
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td
index 4ca52652086b..661d2a78eeef 100644
--- a/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -177,7 +177,7 @@ class CS_ALU<bits<2> funct2, string OpcodeStr, RegisterClass cls,
let Predicates = [HasStdExtC] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in
def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
(ins SP:$rs1, uimm10_lsb00nonzero:$imm),
"c.addi4spn", "$rd, $rs1, $imm"> {
@@ -260,7 +260,7 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
- DecoderNamespace = "RISCV32Only_" in
+ DecoderNamespace = "RISCV32Only_", Defs = [X1] in
def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset),
"c.jal", "$offset">,
Requires<[IsRV32]>;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index a38545ecf430..f2438ee43075 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/TargetRegistry.h"
@@ -301,8 +302,8 @@ namespace {
} // end anonymous namespace
MCAsmBackend *llvm::createSparcAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- return new ELFSparcAsmBackend(T, TT.getOS());
+ return new ELFSparcAsmBackend(T, STI.getTargetTriple().getOS());
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 563e6f4efbe6..8390198479ba 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -40,8 +40,8 @@ Target &getTheSparcelTarget();
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter>
createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index e035c3b87a40..5cd4a7daf0fa 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
@@ -122,9 +123,10 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
}
MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
- uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ uint8_t OSABI =
+ MCELFObjectTargetWriter::getOSABI(STI.getTargetTriple().getOS());
return new SystemZMCAsmBackend(OSABI);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 99b157e37275..ed1b1b95b8f3 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -89,8 +89,8 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx);
MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
std::unique_ptr<MCObjectWriter> createSystemZObjectWriter(raw_pwrite_stream &OS,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 18de4273d1d0..e7c8809de70e 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -69,10 +69,10 @@ static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
}
static MCAsmBackend *createAsmBackend(const Target & /*T*/,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo & /*MRI*/,
- const Triple &TT, StringRef /*CPU*/,
const MCTargetOptions & /*Options*/) {
- return createWebAssemblyAsmBackend(TT);
+ return createWebAssemblyAsmBackend(STI.getTargetTriple());
}
static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 239db2a74b24..34db5918926b 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -843,10 +843,11 @@ public:
} // end anonymous namespace
MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TheTriple,
- StringRef CPU,
const MCTargetOptions &Options) {
+ const Triple &TheTriple = STI.getTargetTriple();
+ StringRef CPU = STI.getCPU();
if (TheTriple.isOSBinFormatMachO())
return new DarwinX86_32AsmBackend(T, MRI, CPU);
@@ -862,10 +863,11 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
}
MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TheTriple,
- StringRef CPU,
const MCTargetOptions &Options) {
+ const Triple &TheTriple = STI.getTargetTriple();
+ StringRef CPU = STI.getCPU();
if (TheTriple.isOSBinFormatMachO()) {
MachO::CPUSubTypeX86 CS =
StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index c5859b600ad2..d758c0588cb1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -70,11 +70,13 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createX86_32AsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
-MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
+MCAsmBackend *createX86_64AsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
+ const MCRegisterInfo &MRI,
const MCTargetOptions &Options);
/// Implements X86-only directives for assembly emission.
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index 01d10fe4cae4..855ea683a8af 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -166,49 +166,75 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-/// Check if register \p Reg is live after the \p MI.
-///
-/// \p LiveRegs should be in a state describing liveness information in
-/// that exact place as this function tries to precise analysis made
-/// by \p LiveRegs by exploiting the information about particular
-/// instruction \p MI. \p MI is expected to be one of the MOVs handled
-/// by the x86FixupBWInsts pass.
-/// Note: similar to LivePhysRegs::contains this would state that
-/// super-register is not used if only some part of it is used.
-///
-/// X86 backend does not have subregister liveness tracking enabled,
-/// so liveness information might be overly conservative. However, for
-/// some specific instructions (this pass only cares about MOVs) we can
-/// produce more precise results by analysing that MOV's operands.
-///
-/// Indeed, if super-register is not live before the mov it means that it
-/// was originally <read-undef> and so we are free to modify these
-/// undef upper bits. That may happen in case where the use is in another MBB
-/// and the vreg/physreg corresponding to the move has higher width than
-/// necessary (e.g. due to register coalescing with a "truncate" copy).
-/// So, it handles pattern like this:
-///
-/// %bb.2: derived from LLVM BB %if.then
-/// Live Ins: %rdi
-/// Predecessors according to CFG: %bb.0
-/// %ax = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax;
-/// mem:LD2[%p]
-/// No implicit %eax
-/// Successors according to CFG: %bb.3(?%)
+/// \brief Check if after \p OrigMI the only portion of super register
+/// of the destination register of \p OrigMI that is alive is that
+/// destination register.
///
-/// %bb.3: derived from LLVM BB %if.end
-/// Live Ins: %eax Only %ax is actually live
-/// Predecessors according to CFG: %bb.2 %bb.1
-/// %ax = KILL %ax, implicit killed %eax
-/// RET 0, %ax
-static bool isLive(const MachineInstr &MI,
- const LivePhysRegs &LiveRegs,
- const TargetRegisterInfo *TRI,
- unsigned Reg) {
- if (!LiveRegs.contains(Reg))
+/// If so, return that super register in \p SuperDestReg.
+bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
+ unsigned &SuperDestReg) const {
+ auto *TRI = &TII->getRegisterInfo();
+
+ unsigned OrigDestReg = OrigMI->getOperand(0).getReg();
+ SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
+
+ const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg);
+
+ // Make sure that the sub-register that this instruction has as its
+ // destination is the lowest order sub-register of the super-register.
+ // If it isn't, then the register isn't really dead even if the
+ // super-register is considered dead.
+ if (SubRegIdx == X86::sub_8bit_hi)
return false;
- unsigned Opc = MI.getOpcode(); (void)Opc;
+ // If neither the destination-super register nor any applicable subregisters
+ // are live after this instruction, then the super register is safe to use.
+ if (!LiveRegs.contains(SuperDestReg)) {
+ // If the original destination register was not the low 8-bit subregister
+ // then the super register check is sufficient.
+ if (SubRegIdx != X86::sub_8bit)
+ return true;
+ // If the original destination register was the low 8-bit subregister and
+ // we also need to check the 16-bit subregister and the high 8-bit
+ // subregister.
+ if (!LiveRegs.contains(getX86SubSuperRegister(OrigDestReg, 16)) &&
+ !LiveRegs.contains(getX86SubSuperRegister(SuperDestReg, 8,
+ /*High=*/true)))
+ return true;
+ // Otherwise, we have a little more checking to do.
+ }
+
+ // If we get here, the super-register destination (or some part of it) is
+ // marked as live after the original instruction.
+ //
+ // The X86 backend does not have subregister liveness tracking enabled,
+ // so liveness information might be overly conservative. Specifically, the
+ // super register might be marked as live because it is implicitly defined
+ // by the instruction we are examining.
+ //
+ // However, for some specific instructions (this pass only cares about MOVs)
+ // we can produce more precise results by analysing that MOV's operands.
+ //
+ // Indeed, if super-register is not live before the mov it means that it
+ // was originally <read-undef> and so we are free to modify these
+ // undef upper bits. That may happen in case where the use is in another MBB
+ // and the vreg/physreg corresponding to the move has higher width than
+ // necessary (e.g. due to register coalescing with a "truncate" copy).
+ // So, we would like to handle patterns like this:
+ //
+ // %bb.2: derived from LLVM BB %if.then
+ // Live Ins: %rdi
+ // Predecessors according to CFG: %bb.0
+ // %ax<def> = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax
+ // ; No implicit %eax
+ // Successors according to CFG: %bb.3(?%)
+ //
+ // %bb.3: derived from LLVM BB %if.end
+ // Live Ins: %eax Only %ax is actually live
+ // Predecessors according to CFG: %bb.2 %bb.1
+ // %ax = KILL %ax, implicit killed %eax
+ // RET 0, %ax
+ unsigned Opc = OrigMI->getOpcode(); (void)Opc;
// These are the opcodes currently handled by the pass, if something
// else will be added we need to ensure that new opcode has the same
// properties.
@@ -217,65 +243,28 @@ static bool isLive(const MachineInstr &MI,
"Unexpected opcode.");
bool IsDefined = false;
- for (auto &MO: MI.implicit_operands()) {
+ for (auto &MO: OrigMI->implicit_operands()) {
if (!MO.isReg())
continue;
assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!");
- for (MCSuperRegIterator Supers(Reg, TRI, true); Supers.isValid(); ++Supers) {
+ for (MCSuperRegIterator Supers(OrigDestReg, TRI, true); Supers.isValid();
+ ++Supers) {
if (*Supers == MO.getReg()) {
if (MO.isDef())
IsDefined = true;
else
- return true; // SuperReg Imp-used' -> live before the MI
+ return false; // SuperReg Imp-used' -> live before the MI
}
}
}
// Reg is not Imp-def'ed -> it's live both before/after the instruction.
if (!IsDefined)
- return true;
+ return false;
// Otherwise, the Reg is not live before the MI and the MOV can't
// make it really live, so it's in fact dead even after the MI.
- return false;
-}
-
-/// \brief Check if after \p OrigMI the only portion of super register
-/// of the destination register of \p OrigMI that is alive is that
-/// destination register.
-///
-/// If so, return that super register in \p SuperDestReg.
-bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
- unsigned &SuperDestReg) const {
- auto *TRI = &TII->getRegisterInfo();
-
- unsigned OrigDestReg = OrigMI->getOperand(0).getReg();
- SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
-
- const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg);
-
- // Make sure that the sub-register that this instruction has as its
- // destination is the lowest order sub-register of the super-register.
- // If it isn't, then the register isn't really dead even if the
- // super-register is considered dead.
- if (SubRegIdx == X86::sub_8bit_hi)
- return false;
-
- if (isLive(*OrigMI, LiveRegs, TRI, SuperDestReg))
- return false;
-
- if (SubRegIdx == X86::sub_8bit) {
- // In the case of byte registers, we also have to check that the upper
- // byte register is also dead. That is considered to be independent of
- // whether the super-register is dead.
- unsigned UpperByteReg =
- getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true);
-
- if (isLive(*OrigMI, LiveRegs, TRI, UpperByteReg))
- return false;
- }
-
return true;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9edd799779c7..a6f56877bd64 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -996,8 +996,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
@@ -1151,15 +1151,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i1, MVT::v16i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i1, MVT::v16i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i1, MVT::v4i32);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
+ if (Subtarget.hasVLX()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
+ }
+
// Extends of v16i1/v8i1 to 128-bit vectors.
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom);
@@ -1186,9 +1197,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
- for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1,
- MVT::v16i1, MVT::v32i1, MVT::v64i1 })
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+ for (auto VT : { MVT::v1i1, MVT::v8i1 })
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
@@ -1219,11 +1229,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::v16i16, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::v16i8, Promote);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
@@ -1428,6 +1438,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
+ for (auto VT : { MVT::v16i1, MVT::v32i1 })
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v32i1 masks to 256-bit vectors.
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
@@ -1540,6 +1552,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
+ for (auto VT : { MVT::v2i1, MVT::v4i1 })
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Extends from v2i1/v4i1 masks to 128-bit vectors.
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
@@ -2140,6 +2154,10 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
const SDLoc &Dl, SelectionDAG &DAG) {
EVT ValVT = ValArg.getValueType();
+ if (ValVT == MVT::v1i1)
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
+ DAG.getIntPtrConstant(0, Dl));
+
if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
// Two stage lowering might be required
@@ -4625,6 +4643,14 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
+bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
+ EVT BitcastVT) const {
+ if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
+ return false;
+
+ return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
+}
+
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
const SelectionDAG &DAG) const {
// Do not merge to float value size (128 bytes) if no implicit
@@ -7471,7 +7497,7 @@ static bool isAddSub(const BuildVectorSDNode *BV,
}
/// Returns true if is possible to fold MUL and an idiom that has already been
-/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
+/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
///
@@ -7708,6 +7734,10 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
case ISD::AND:
case ISD::XOR:
case ISD::OR:
+ // Don't do this if the buildvector is a splat - we'd replace one
+ // constant with an entire vector.
+ if (Op->getSplatValue())
+ return SDValue();
if (!TLI.isOperationLegalOrPromote(Opcode, VT))
return SDValue();
break;
@@ -11261,6 +11291,20 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
+ // Attempt to directly match PSHUFLW or PSHUFHW.
+ if (isUndefOrInRange(LoMask, 0, 4) &&
+ isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
+ return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
+ getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));
+ }
+ if (isUndefOrInRange(HiMask, 4, 8) &&
+ isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
+ for (int i = 0; i != 4; ++i)
+ HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));
+ return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
+ getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));
+ }
+
SmallVector<int, 4> LoInputs;
copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
std::sort(LoInputs.begin(), LoInputs.end());
@@ -11280,13 +11324,11 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
- // If we are splatting two values from one half - one to each half, then
- // we can shuffle that half so each is splatted to a dword, then splat those
- // to their respective halves.
- auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp,
- int DOffset) {
- int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4};
- int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1};
+ // If we are shuffling values from one half - check how many different DWORD
+ // pairs we need to create. If only 1 or 2 then we can perform this as a
+ // PSHUFLW/PSHUFHW + PSHUFD instead of the PSHUFD+PSHUFLW+PSHUFHW chain below.
+ auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask,
+ ArrayRef<int> PSHUFDMask, unsigned ShufWOp) {
V = DAG.getNode(ShufWOp, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
V = DAG.getBitcast(PSHUFDVT, V);
@@ -11295,10 +11337,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
return DAG.getBitcast(VT, V);
};
- if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0)
- return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0);
- if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0)
- return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2);
+ if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
+ int PSHUFDMask[4] = { -1, -1, -1, -1 };
+ SmallVector<std::pair<int, int>, 4> DWordPairs;
+ int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
+
+ // Collect the different DWORD pairs.
+ for (int DWord = 0; DWord != 4; ++DWord) {
+ int M0 = Mask[2 * DWord + 0];
+ int M1 = Mask[2 * DWord + 1];
+ M0 = (M0 >= 0 ? M0 % 4 : M0);
+ M1 = (M1 >= 0 ? M1 % 4 : M1);
+ if (M0 < 0 && M1 < 0)
+ continue;
+
+ bool Match = false;
+ for (int j = 0, e = DWordPairs.size(); j < e; ++j) {
+ auto &DWordPair = DWordPairs[j];
+ if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) &&
+ (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) {
+ DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first);
+ DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second);
+ PSHUFDMask[DWord] = DOffset + j;
+ Match = true;
+ break;
+ }
+ }
+ if (!Match) {
+ PSHUFDMask[DWord] = DOffset + DWordPairs.size();
+ DWordPairs.push_back(std::make_pair(M0, M1));
+ }
+ }
+
+ if (DWordPairs.size() <= 2) {
+ DWordPairs.resize(2, std::make_pair(-1, -1));
+ int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
+ DWordPairs[1].first, DWordPairs[1].second};
+ if ((NumHToL + NumHToH) == 0)
+ return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW);
+ if ((NumLToL + NumLToH) == 0)
+ return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW);
+ }
+ }
// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
// such inputs we can swap two of the dwords across the half mark and end up
@@ -15020,6 +15100,42 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
return insert1BitVector(Op, DAG, Subtarget);
}
+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+ "Only vXi1 extract_subvectors need custom lowering");
+
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0) // the operation is legal
+ return Op;
+
+ MVT VecVT = Vec.getSimpleValueType();
+ unsigned NumElems = VecVT.getVectorNumElements();
+
+ // Extend to natively supported kshift.
+ MVT WideVecVT = VecVT;
+ if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+ WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+ DAG.getUNDEF(WideVecVT), Vec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ // Shift to the LSB.
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
+ DAG.getIntPtrConstant(0, dl));
+}
+
// Returns the appropriate wrapper opcode for a global reference.
unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
// References to absolute symbols are never PC-relative.
@@ -15545,19 +15661,13 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
DAG.getUNDEF(SrcVT)));
}
- if (SrcVT.getVectorElementType() == MVT::i1) {
- if (SrcVT == MVT::v2i1) {
- // For v2i1, we need to widen to v4i1 first.
- assert(VT == MVT::v2f64 && "Unexpected type");
- Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
- DAG.getUNDEF(MVT::v2i1));
- return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
- DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
- }
-
- MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
- return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
+ if (SrcVT == MVT::v2i1) {
+ // For v2i1, we need to widen to v4i1 first.
+ assert(VT == MVT::v2f64 && "Unexpected type");
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
+ DAG.getUNDEF(MVT::v2i1));
+ return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
+ DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
}
return SDValue();
}
@@ -15894,19 +16004,13 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
- if (SrcVT.getVectorElementType() == MVT::i1) {
- if (SrcVT == MVT::v2i1) {
- // For v2i1, we need to widen to v4i1 first.
- assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
- N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
- DAG.getUNDEF(MVT::v2i1));
- return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
- }
-
- MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
- return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
+ if (SrcVT == MVT::v2i1) {
+ // For v2i1, we need to widen to v4i1 first.
+ assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
+ DAG.getUNDEF(MVT::v2i1));
+ return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
}
switch (SrcVT.SimpleTy) {
@@ -16418,13 +16522,16 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
if (InVT.getScalarSizeInBits() <= 16) {
if (Subtarget.hasBWI()) {
// legal, will go to VPMOVB2M, VPMOVW2M
- // Shift packed bytes not supported natively, bitcast to word
- MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
- SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
- DAG.getBitcast(ExtVT, In),
- DAG.getConstant(ShiftInx, DL, ExtVT));
- ShiftNode = DAG.getBitcast(InVT, ShiftNode);
- return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+ if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
+ // We need to shift to get the lsb into sign position.
+ // Shift packed bytes not supported natively, bitcast to word
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
+ In = DAG.getNode(ISD::SHL, DL, ExtVT,
+ DAG.getBitcast(ExtVT, In),
+ DAG.getConstant(ShiftInx, DL, ExtVT));
+ In = DAG.getBitcast(InVT, In);
+ }
+ return DAG.getNode(X86ISD::CVT2MASK, DL, VT, In);
}
// Use TESTD/Q, extended vector to packed dword/qword.
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@@ -16437,9 +16544,12 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
ShiftInx = InVT.getScalarSizeInBits() - 1;
}
- SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
- DAG.getConstant(ShiftInx, DL, InVT));
- return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
+ if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
+ // We need to shift to get the lsb into sign position.
+ In = DAG.getNode(ISD::SHL, DL, InVT, In,
+ DAG.getConstant(ShiftInx, DL, InVT));
+ }
+ return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@@ -16572,9 +16682,29 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
- assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
SDValue Src = Op.getOperand(0);
SDLoc dl(Op);
+
+ if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) {
+ MVT ResVT = MVT::v4i32;
+ MVT TruncVT = MVT::v4i1;
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ if (!IsSigned && !Subtarget.hasVLX()) {
+ // Widen to 512-bits.
+ ResVT = MVT::v8i32;
+ TruncVT = MVT::v8i1;
+ Opc = ISD::FP_TO_UINT;
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
+ DAG.getUNDEF(MVT::v8f64),
+ Src, DAG.getIntPtrConstant(0, dl));
+ }
+ SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
@@ -18629,6 +18759,7 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl));
}
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op);
+ Op = DAG.getBitcast(MVT::i8, Op);
return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
St->getMemOperand());
}
@@ -18645,12 +18776,12 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
DAG.getIntPtrConstant(16, dl));
Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi);
- SDValue BasePtrHi =
- DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(2, dl, BasePtr.getValueType()));
+ SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);
SDValue StHi = DAG.getStore(St->getChain(), dl, Hi,
- BasePtrHi, St->getMemOperand());
+ BasePtrHi, St->getPointerInfo().getWithOffset(2),
+ MinAlign(St->getAlignment(), 2U),
+ St->getMemOperand()->getFlags());
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi);
}
@@ -24545,6 +24676,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -29735,7 +29867,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
/// by this operation to try to flow through the rest of the combiner
/// the fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
- SDValue &Opnd0, SDValue &Opnd1,
+ SDValue &Opnd0, SDValue &Opnd1,
bool matchSubAdd = false) {
EVT VT = N->getValueType(0);
@@ -30309,9 +30441,35 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// (i16 movmsk (16i8 sext (v16i1 x)))
// before the setcc result is scalarized on subtargets that don't have legal
// vxi1 types.
- if (DCI.isBeforeLegalize())
+ if (DCI.isBeforeLegalize()) {
if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
return V;
+
+ // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
+ // type, widen both sides to avoid a trip through memory.
+ if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
+ Subtarget.hasVLX()) {
+ SDLoc dl(N);
+ N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
+ N0 = DAG.getBitcast(MVT::v8i1, N0);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
+ // type, widen both sides to avoid a trip through memory.
+ if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
+ Subtarget.hasVLX()) {
+ SDLoc dl(N);
+ unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
+ Ops[0] = N0;
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+ N0 = DAG.getBitcast(MVT::i8, N0);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ }
+ }
+
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
@@ -30791,6 +30949,11 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
return SDValue();
+ // Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
+ if (X86ISD::VBROADCAST == Src.getOpcode() &&
+ Src.getOperand(0).getValueType() == VT)
+ return Src.getOperand(0);
+
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
@@ -36153,13 +36316,23 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
- // We're looking for an oversized integer equality comparison, but ignore a
- // comparison with zero because that gets special treatment in EmitTest().
+ // We're looking for an oversized integer equality comparison.
SDValue X = SetCC->getOperand(0);
SDValue Y = SetCC->getOperand(1);
EVT OpVT = X.getValueType();
unsigned OpSize = OpVT.getSizeInBits();
- if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
+ if (!OpVT.isScalarInteger() || OpSize < 128)
+ return SDValue();
+
+ // Ignore a comparison with zero because that gets special treatment in
+ // EmitTest(). But make an exception for the special case of a pair of
+ // logically-combined vector-sized operands compared to zero. This pattern may
+ // be generated by the memcmp expansion pass with oversized integer compares
+ // (see PR33325).
+ bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR &&
+ X.getOperand(0).getOpcode() == ISD::XOR &&
+ X.getOperand(1).getOpcode() == ISD::XOR;
+ if (isNullConstant(Y) && !IsOrXorXorCCZero)
return SDValue();
// Bail out if we know that this is not really just an oversized integer.
@@ -36174,15 +36347,29 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
(OpSize == 256 && Subtarget.hasAVX2())) {
EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
- SDValue VecX = DAG.getBitcast(VecVT, X);
- SDValue VecY = DAG.getBitcast(VecVT, Y);
-
+ SDValue Cmp;
+ if (IsOrXorXorCCZero) {
+ // This is a bitwise-combined equality comparison of 2 pairs of vectors:
+ // setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
+ // Use 2 vector equality compares and 'and' the results before doing a
+ // MOVMSK.
+ SDValue A = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(0));
+ SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
+ SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
+ SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
+ SDValue Cmp1 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, A, B);
+ SDValue Cmp2 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, C, D);
+ Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
+ } else {
+ SDValue VecX = DAG.getBitcast(VecVT, X);
+ SDValue VecY = DAG.getBitcast(VecVT, Y);
+ Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
+ }
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
// setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
// setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
- SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
MVT::i32);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7708f577ba70..1fb7c7ed4e98 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1023,6 +1023,8 @@ namespace llvm {
return NumElem > 2;
}
+ bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
+
/// Intel processors have a unified instruction and data cache
const char * getClearCacheBuiltinName() const override {
return nullptr; // nothing to do, move along.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index dcd84930741b..458f68072d6c 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2701,11 +2701,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
// Load/store kreg
let Predicates = [HasDQI] in {
- def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
- (KMOVBmk addr:$dst, VK8:$src)>;
- def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
- (KMOVBkm addr:$src)>;
-
def : Pat<(store VK4:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
def : Pat<(store VK2:$src, addr:$dst),
@@ -2745,22 +2740,10 @@ let Predicates = [HasAVX512, NoDQI] in {
}
let Predicates = [HasAVX512] in {
- def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
- (KMOVWmk addr:$dst, VK16:$src)>;
def : Pat<(v1i1 (load addr:$src)),
- (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
- def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
- (KMOVWkm addr:$src)>;
-}
-let Predicates = [HasBWI] in {
- def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
- (KMOVDmk addr:$dst, VK32:$src)>;
- def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
- (KMOVDkm addr:$src)>;
- def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
- (KMOVQmk addr:$dst, VK64:$src)>;
- def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
- (KMOVQkm addr:$src)>;
+ (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
+ def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
+ (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
}
let Predicates = [HasAVX512] in {
@@ -3087,66 +3070,6 @@ defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
-
-multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From,
- X86KVectorVTInfo To, Predicate prd> {
-let Predicates = [prd] in
- def :
- Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
- (To.KVT(COPY_TO_REGCLASS
- (!cast<Instruction>(InstrStr#"ri") From.KVT:$src,
- (i8 imm:$imm8)), To.KRC))>;
-}
-
-multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From,
- X86KVectorVTInfo To> {
-def :
- Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
- (To.KVT(COPY_TO_REGCLASS
- (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16),
- (i8 imm:$imm8)), To.KRC))>;
-}
-
-defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>;
-
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>;
-
-// Patterns for kmask shift
-multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
- def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
- (VT (COPY_TO_REGCLASS
- (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
- (I8Imm $imm)),
- RC))>;
- def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
- (VT (COPY_TO_REGCLASS
- (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
- (I8Imm $imm)),
- RC))>;
-}
-
-defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
-defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
-defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//
// AVX-512 - Aligned and unaligned load and store
//
@@ -3428,28 +3351,33 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
+multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
+ X86VectorVTInfo Wide> {
+ def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
+ Narrow.RC:$src1, Narrow.RC:$src0)),
+ (EXTRACT_SUBREG
+ (Wide.VT
+ (!cast<Instruction>(InstrStr#"rrk")
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
+ (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
+ Narrow.SubRegIdx)>;
+
+ def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
+ Narrow.RC:$src1, Narrow.ImmAllZerosV)),
+ (EXTRACT_SUBREG
+ (Wide.VT
+ (!cast<Instruction>(InstrStr#"rrkz")
+ (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
+ (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
+ Narrow.SubRegIdx)>;
+}
+
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
// available. Use a 512-bit operation and extract.
let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
- (v8f32 VR256X:$src0))),
- (EXTRACT_SUBREG
- (v16f32
- (VMOVAPSZrrk
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
- (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
- sub_ymm)>;
-
-def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
- (v8i32 VR256X:$src0))),
- (EXTRACT_SUBREG
- (v16i32
- (VMOVDQA32Zrrk
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
- (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
- (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
- sub_ymm)>;
+ defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
+ defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
}
let Predicates = [HasAVX512] in {
@@ -4633,7 +4561,7 @@ multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
sub_xmm)>;
}
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoVLX] in {
defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 039b4a248544..a481644efdd6 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -94,7 +94,8 @@ let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- OpndItins itins, bit Commutable = 0> {
+ OpndItins itins, bit Commutable = 0,
+ X86MemOperand OType = i64mem> {
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -103,7 +104,7 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = Commutable;
}
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
+ (ins VR64:$src1, OType:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))],
@@ -524,13 +525,16 @@ defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
MMX_UNPCK_H_ITINS>;
defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
int_x86_mmx_punpcklbw,
- MMX_UNPCK_L_ITINS>;
+ MMX_UNPCK_L_ITINS,
+ 0, i32mem>;
defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
int_x86_mmx_punpcklwd,
- MMX_UNPCK_L_ITINS>;
+ MMX_UNPCK_L_ITINS,
+ 0, i32mem>;
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
int_x86_mmx_punpckldq,
- MMX_UNPCK_L_ITINS>;
+ MMX_UNPCK_L_ITINS,
+ 0, i32mem>;
// -- Pack Instructions
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 8712ca4823c6..122f51a0d214 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -440,16 +440,14 @@ static void
scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
DenseMap<Value *, Value *> &ResolvedValues) {
auto *PrevBB = Prev->getParent();
- auto *I = &*NewBlock->begin();
- while (auto PN = dyn_cast<PHINode>(I)) {
- auto V = PN->getIncomingValueForBlock(PrevBB);
+ for (PHINode &PN : NewBlock->phis()) {
+ auto V = PN.getIncomingValueForBlock(PrevBB);
// See if we already resolved it.
auto VI = ResolvedValues.find(V);
if (VI != ResolvedValues.end())
V = VI->second;
// Remember the value.
- ResolvedValues[PN] = V;
- I = I->getNextNode();
+ ResolvedValues[&PN] = V;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 541dde6c47d2..38604830b885 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -728,6 +728,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // sqrt(a) * sqrt(b) -> sqrt(a * b)
+ if (AllowReassociate &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Opnd0 = nullptr;
+ Value *Opnd1 = nullptr;
+ if (match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(Opnd0))) &&
+ match(Op1, m_Intrinsic<Intrinsic::sqrt>(m_Value(Opnd1)))) {
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
+ Value *FMulVal = Builder.CreateFMul(Opnd0, Opnd1);
+ Value *Sqrt = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::sqrt, I.getType());
+ Value *SqrtCall = Builder.CreateCall(Sqrt, FMulVal);
+ return replaceInstUsesWith(I, SqrtCall);
+ }
+ }
+
// Handle symmetric situation in a 2-iteration loop
Value *Opnd0 = Op0;
Value *Opnd1 = Op1;
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 207243231aad..caa73b2ff01c 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -265,15 +265,12 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
CallSite CS2(CallInst2);
// Handle PHIs used as arguments in the call-site.
- for (auto &PI : *TailBB) {
- PHINode *PN = dyn_cast<PHINode>(&PI);
- if (!PN)
- break;
+ for (PHINode &PN : TailBB->phis()) {
unsigned ArgNo = 0;
for (auto &CI : CS.args()) {
- if (&*CI == PN) {
- CS1.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock1));
- CS2.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock2));
+ if (&*CI == &PN) {
+ CS1.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock1));
+ CS2.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock2));
}
++ArgNo;
}
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index bf92e43c4715..5594c29bbd9f 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -592,12 +592,8 @@ private:
/// Create a ModelledPHI for each PHI in BB, adding to PHIs.
void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
SmallPtrSetImpl<Value *> &PHIContents) {
- for (auto &I : *BB) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- return;
-
- auto MPHI = ModelledPHI(PN);
+ for (PHINode &PN : BB->phis()) {
+ auto MPHI = ModelledPHI(&PN);
PHIs.insert(MPHI);
for (auto *V : MPHI.getValues())
PHIContents.insert(V);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 74d6014d3e3d..221fe57581ca 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -485,9 +485,8 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
BasicBlock *Header = L->getHeader();
SmallVector<WeakTrackingVH, 8> PHIs;
- for (BasicBlock::iterator I = Header->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PHIs.push_back(PN);
+ for (PHINode &PN : Header->phis())
+ PHIs.push_back(&PN);
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
@@ -724,13 +723,12 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
assert(LoopHeader && "Invalid loop");
for (auto *ExitBB : ExitBlocks) {
- BasicBlock::iterator BBI = ExitBB->begin();
// If there are no more PHI nodes in this exit block, then no more
// values defined inside the loop are used on this path.
- while (auto *PN = dyn_cast<PHINode>(BBI++)) {
- for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
- IncomingValIdx != E; ++IncomingValIdx) {
- auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
+ for (PHINode &PN : ExitBB->phis()) {
+ for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
+ IncomingValIdx != E; ++IncomingValIdx) {
+ auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
// We currently only support loop exits from loop header. If the
// incoming block is not loop header, we need to recursively check
@@ -755,8 +753,7 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
if (!L->isLoopInvariant(Cond))
continue;
- auto *ExitVal =
- dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
+ auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
// Only deal with PHIs.
if (!ExitVal)
@@ -771,8 +768,8 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
if (PreheaderIdx != -1) {
assert(ExitVal->getParent() == LoopHeader &&
"ExitVal must be in loop header");
- PN->setIncomingValue(IncomingValIdx,
- ExitVal->getIncomingValue(PreheaderIdx));
+ PN.setIncomingValue(IncomingValIdx,
+ ExitVal->getIncomingValue(PreheaderIdx));
}
}
}
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 5c4d55bfbb2b..cf98088111be 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1174,13 +1174,9 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
if (OriginalLoop.contains(SBB))
continue; // not an exit block
- for (Instruction &I : *SBB) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- break;
-
- Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
- PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
+ for (PHINode &PN : SBB->phis()) {
+ Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
+ PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
}
}
}
@@ -1327,16 +1323,12 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
// each of the PHI nodes in the loop header. This feeds into the initial
// value of the same PHI nodes if/when we continue execution.
- for (Instruction &I : *LS.Header) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- break;
-
- PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
+ for (PHINode &PN : LS.Header->phis()) {
+ PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy",
BranchToContinuation);
- NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
- NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
+ NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
+ NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch),
RRI.ExitSelector);
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
}
@@ -1348,12 +1340,8 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// The latch exit now has a branch from `RRI.ExitSelector' instead of
// `LS.Latch'. The PHI nodes need to be updated to reflect that.
- for (Instruction &I : *LS.LatchExit) {
- if (PHINode *PN = dyn_cast<PHINode>(&I))
- replacePHIBlock(PN, LS.Latch, RRI.ExitSelector);
- else
- break;
- }
+ for (PHINode &PN : LS.LatchExit->phis())
+ replacePHIBlock(&PN, LS.Latch, RRI.ExitSelector);
return RRI;
}
@@ -1362,15 +1350,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
LoopStructure &LS, BasicBlock *ContinuationBlock,
const LoopConstrainer::RewrittenRangeInfo &RRI) const {
unsigned PHIIndex = 0;
- for (Instruction &I : *LS.Header) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- break;
-
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
- if (PN->getIncomingBlock(i) == ContinuationBlock)
- PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
- }
+ for (PHINode &PN : LS.Header->phis())
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
+ if (PN.getIncomingBlock(i) == ContinuationBlock)
+ PN.setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
LS.IndVarStart = RRI.IndVarEnd;
}
@@ -1381,14 +1364,9 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
BranchInst::Create(LS.Header, Preheader);
- for (Instruction &I : *LS.Header) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- break;
-
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
- replacePHIBlock(PN, OldPreheader, Preheader);
- }
+ for (PHINode &PN : LS.Header->phis())
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
+ replacePHIBlock(&PN, OldPreheader, Preheader);
return Preheader;
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1476f7850cf0..141c9938bf8b 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1800,11 +1800,10 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
BasicBlock *OldPred,
BasicBlock *NewPred,
DenseMap<Instruction*, Value*> &ValueMap) {
- for (BasicBlock::iterator PNI = PHIBB->begin();
- PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+ for (PHINode &PN : PHIBB->phis()) {
// Ok, we have a PHI node. Figure out what the incoming value was for the
// DestBlock.
- Value *IV = PN->getIncomingValueForBlock(OldPred);
+ Value *IV = PN.getIncomingValueForBlock(OldPred);
// Remap the value if necessary.
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
@@ -1813,7 +1812,7 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
IV = I->second;
}
- PN->addIncoming(IV, NewPred);
+ PN.addIncoming(IV, NewPred);
}
}
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 82604a8842bf..15cd1086f209 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -49,11 +49,10 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// must pass through a PHI in the exit block, meaning that this check is
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
- BasicBlock::iterator BI = ExitBlock->begin();
bool AllEntriesInvariant = true;
bool AllOutgoingValuesSame = true;
- while (PHINode *P = dyn_cast<PHINode>(BI)) {
- Value *incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
+ for (PHINode &P : ExitBlock->phis()) {
+ Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
@@ -61,7 +60,7 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// be used.
AllOutgoingValuesSame =
all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
- return incoming == P->getIncomingValueForBlock(BB);
+ return incoming == P.getIncomingValueForBlock(BB);
});
if (!AllOutgoingValuesSame)
@@ -72,8 +71,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
AllEntriesInvariant = false;
break;
}
-
- ++BI;
}
if (Changed)
@@ -162,11 +159,9 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
if (ExitBlock && isLoopNeverExecuted(L)) {
DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
// Set incoming value to undef for phi nodes in the exit block.
- BasicBlock::iterator BI = ExitBlock->begin();
- while (PHINode *P = dyn_cast<PHINode>(BI)) {
- for (unsigned i = 0; i < P->getNumIncomingValues(); i++)
- P->setIncomingValue(i, UndefValue::get(P->getType()));
- BI++;
+ for (PHINode &P : ExitBlock->phis()) {
+ std::fill(P.incoming_values().begin(), P.incoming_values().end(),
+ UndefValue::get(P.getType()));
}
deleteDeadLoop(L, &DT, &SE, &LI);
++NumDeleted;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 953854c8b7b7..ff3e9eef16d9 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -857,12 +857,11 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
/// Return true if this AddRec is already a phi in its loop.
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
+ for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
+ if (SE.isSCEVable(PN.getType()) &&
+ (SE.getEffectiveSCEVType(PN.getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
+ SE.getSCEV(&PN) == AR)
return true;
}
return false;
@@ -3013,15 +3012,14 @@ void LSRInstance::CollectChains() {
} // Continue walking down the instructions.
} // Continue walking down the domtree.
// Visit phi backedges to determine if the chain can generate the IV postinc.
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (!SE.isSCEVable(PN->getType()))
+ for (PHINode &PN : L->getHeader()->phis()) {
+ if (!SE.isSCEVable(PN.getType()))
continue;
Instruction *IncV =
- dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
if (IncV)
- ChainInstruction(PN, IncV, ChainUsersVec);
+ ChainInstruction(&PN, IncV, ChainUsersVec);
}
// Remove any unprofitable chains.
unsigned ChainIdx = 0;
@@ -3152,12 +3150,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If LSR created a new, wider phi, we may also replace its postinc. We only
// do this if we also found a wide value for the head of the chain.
if (isa<PHINode>(Chain.tailUserInst())) {
- for (BasicBlock::iterator I = L->getHeader()->begin();
- PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
- if (!isCompatibleIVType(Phi, IVSrc))
+ for (PHINode &Phi : L->getHeader()->phis()) {
+ if (!isCompatibleIVType(&Phi, IVSrc))
continue;
Instruction *PostIncV = dyn_cast<Instruction>(
- Phi->getIncomingValueForBlock(L->getLoopLatch()));
+ Phi.getIncomingValueForBlock(L->getLoopLatch()));
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
continue;
Value *IVOper = IVSrc;
@@ -3168,7 +3165,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
}
- Phi->replaceUsesOfWith(PostIncV, IVOper);
+ Phi.replaceUsesOfWith(PostIncV, IVOper);
DeadInsts.emplace_back(PostIncV);
}
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index bd468338a1d0..f2405d9b0c03 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1274,12 +1274,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// If the successor of the exit block had PHI nodes, add an entry for
// NewExit.
- for (BasicBlock::iterator I = ExitSucc->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+ for (PHINode &PN : ExitSucc->phis()) {
+ Value *V = PN.getIncomingValueForBlock(ExitBlocks[i]);
ValueToValueMapTy::iterator It = VMap.find(V);
if (It != VMap.end()) V = It->second;
- PN->addIncoming(V, NewExit);
+ PN.addIncoming(V, NewExit);
}
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
@@ -1496,10 +1495,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BranchInst::Create(Abort, OldSISucc,
ConstantInt::getTrue(Context), NewSISucc);
// Release the PHI operands for this edge.
- for (BasicBlock::iterator II = NewSISucc->begin();
- PHINode *PN = dyn_cast<PHINode>(II); ++II)
- PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
- UndefValue::get(PN->getType()));
+ for (PHINode &PN : NewSISucc->phis())
+ PN.setIncomingValue(PN.getBasicBlockIndex(Switch),
+ UndefValue::get(PN.getType()));
// Tell the domtree about the new block. We don't fully update the
// domtree here -- instead we force it to do a full recomputation
// after the pass is complete -- but we do need to inform it of
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 66608ec631f6..9dc550ceaeca 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -523,10 +523,8 @@ private:
DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
<< " -> " << Dest->getName() << '\n');
- PHINode *PN;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I)
- visitPHINode(*PN);
+ for (PHINode &PN : Dest->phis())
+ visitPHINode(PN);
}
}
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 3d0fca0bc3a5..aba732bc413f 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -271,19 +271,14 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
BasicBlock &OldExitingBB,
BasicBlock &OldPH) {
- for (Instruction &I : UnswitchedBB) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- // No more PHIs to check.
- break;
-
+ for (PHINode &PN : UnswitchedBB.phis()) {
// When the loop exit is directly unswitched we just need to update the
// incoming basic block. We loop to handle weird cases with repeated
// incoming blocks, but expect to typically only have one operand here.
- for (auto i : seq<int>(0, PN->getNumOperands())) {
- assert(PN->getIncomingBlock(i) == &OldExitingBB &&
+ for (auto i : seq<int>(0, PN.getNumOperands())) {
+ assert(PN.getIncomingBlock(i) == &OldExitingBB &&
"Found incoming block different from unique predecessor!");
- PN->setIncomingBlock(i, &OldPH);
+ PN.setIncomingBlock(i, &OldPH);
}
}
}
@@ -302,14 +297,9 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
assert(&ExitBB != &UnswitchedBB &&
"Must have different loop exit and unswitched blocks!");
Instruction *InsertPt = &*UnswitchedBB.begin();
- for (Instruction &I : ExitBB) {
- auto *PN = dyn_cast<PHINode>(&I);
- if (!PN)
- // No more PHIs to check.
- break;
-
- auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,
- PN->getName() + ".split", InsertPt);
+ for (PHINode &PN : ExitBB.phis()) {
+ auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
+ PN.getName() + ".split", InsertPt);
// Walk backwards over the old PHI node's inputs to minimize the cost of
// removing each one. We have to do this weird loop manually so that we
@@ -320,18 +310,18 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
// allowed us to create a single entry for a predecessor block without
// having separate entries for each "edge" even though these edges are
// required to produce identical results.
- for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
- if (PN->getIncomingBlock(i) != &OldExitingBB)
+ for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
+ if (PN.getIncomingBlock(i) != &OldExitingBB)
continue;
- Value *Incoming = PN->removeIncomingValue(i);
+ Value *Incoming = PN.removeIncomingValue(i);
NewPN->addIncoming(Incoming, &OldPH);
}
// Now replace the old PHI with the new one and wire the old one in as an
// input to the new one.
- PN->replaceAllUsesWith(NewPN);
- NewPN->addIncoming(PN, &ExitBB);
+ PN.replaceAllUsesWith(NewPN);
+ NewPN->addIncoming(&PN, &ExitBB);
}
}
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 2972e1cff9a4..b8fb80b6cc26 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -544,10 +544,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
/// them in DeletedPhis
void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
PhiMap &Map = DeletedPhis[To];
- for (Instruction &I : *To) {
- if (!isa<PHINode>(I))
- break;
- PHINode &Phi = cast<PHINode>(I);
+ for (PHINode &Phi : To->phis()) {
while (Phi.getBasicBlockIndex(From) != -1) {
Value *Deleted = Phi.removeIncomingValue(From, false);
Map[&Phi].push_back(std::make_pair(From, Deleted));
@@ -557,10 +554,7 @@ void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
/// \brief Add a dummy PHI value as soon as we knew the new predecessor
void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
- for (Instruction &I : *To) {
- if (!isa<PHINode>(I))
- break;
- PHINode &Phi = cast<PHINode>(I);
+ for (PHINode &Phi : To->phis()) {
Value *Undef = UndefValue::get(Phi.getType());
Phi.addIncoming(Undef, From);
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 606bd8baccaa..8f59913e14bb 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -94,9 +94,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
// Recursively deleting a PHI may cause multiple PHIs to be deleted
// or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
SmallVector<WeakTrackingVH, 8> PHIs;
- for (BasicBlock::iterator I = BB->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PHIs.push_back(PN);
+ for (PHINode &PN : BB->phis())
+ PHIs.push_back(&PN);
bool Changed = false;
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
@@ -134,24 +133,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
if (!OnlySucc) return false;
// Can't merge if there is PHI loop.
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- for (Value *IncValue : PN->incoming_values())
- if (IncValue == PN)
- return false;
- } else
- break;
- }
+ for (PHINode &PN : BB->phis())
+ for (Value *IncValue : PN.incoming_values())
+ if (IncValue == &PN)
+ return false;
// Begin by getting rid of unneeded PHIs.
SmallVector<Value *, 4> IncomingValues;
if (isa<PHINode>(BB->front())) {
- for (auto &I : *BB)
- if (PHINode *PN = dyn_cast<PHINode>(&I)) {
- if (PN->getIncomingValue(0) != PN)
- IncomingValues.push_back(PN->getIncomingValue(0));
- } else
- break;
+ for (PHINode &PN : BB->phis())
+ if (PN.getIncomingValue(0) != &PN)
+ IncomingValues.push_back(PN.getIncomingValue(0));
FoldSingleEntryPHINodes(BB, MemDep);
}
@@ -331,6 +323,12 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
for (BasicBlock *Pred : Preds) {
+ // Preds that are not reachable from entry should not be used to identify if
+ // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks
+ // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader
+ // as true and make the NewBB the header of some loop. This breaks LI.
+ if (!DT->isReachableFromEntry(Pred))
+ continue;
// If we need to preserve LCSSA, determine if any of the preds is a loop
// exit.
if (PreserveLCSSA)
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 3653c307619b..464d1a34f518 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -106,10 +106,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");
// For each PHI in the destination block.
- for (BasicBlock::iterator I = DestBB->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- unsigned Idx = PN->getBasicBlockIndex(SplitBB);
- Value *V = PN->getIncomingValue(Idx);
+ for (PHINode &PN : DestBB->phis()) {
+ unsigned Idx = PN.getBasicBlockIndex(SplitBB);
+ Value *V = PN.getIncomingValue(Idx);
// If the input is a PHI which already satisfies LCSSA, don't create
// a new one.
@@ -119,13 +118,13 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
// Otherwise a new PHI is needed. Create one and populate it.
PHINode *NewPN = PHINode::Create(
- PN->getType(), Preds.size(), "split",
+ PN.getType(), Preds.size(), "split",
SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
for (unsigned i = 0, e = Preds.size(); i != e; ++i)
NewPN->addIncoming(V, Preds[i]);
// Update the original PHI.
- PN->setIncomingValue(Idx, NewPN);
+ PN.setIncomingValue(Idx, NewPN);
}
}
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index 8825f77555e7..5dc6068d4a0b 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -47,14 +47,11 @@ using namespace llvm;
///
static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
BasicBlock *MergeBlock) {
- for (auto &I : *Invoke->getNormalDest()) {
- auto *Phi = dyn_cast<PHINode>(&I);
- if (!Phi)
- break;
- int Idx = Phi->getBasicBlockIndex(OrigBlock);
+ for (PHINode &Phi : Invoke->getNormalDest()->phis()) {
+ int Idx = Phi.getBasicBlockIndex(OrigBlock);
if (Idx == -1)
continue;
- Phi->setIncomingBlock(Idx, MergeBlock);
+ Phi.setIncomingBlock(Idx, MergeBlock);
}
}
@@ -82,16 +79,13 @@ static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
BasicBlock *ThenBlock,
BasicBlock *ElseBlock) {
- for (auto &I : *Invoke->getUnwindDest()) {
- auto *Phi = dyn_cast<PHINode>(&I);
- if (!Phi)
- break;
- int Idx = Phi->getBasicBlockIndex(OrigBlock);
+ for (PHINode &Phi : Invoke->getUnwindDest()->phis()) {
+ int Idx = Phi.getBasicBlockIndex(OrigBlock);
if (Idx == -1)
continue;
- auto *V = Phi->getIncomingValue(Idx);
- Phi->setIncomingBlock(Idx, ThenBlock);
- Phi->addIncoming(V, ElseBlock);
+ auto *V = Phi.getIncomingValue(Idx);
+ Phi.setIncomingBlock(Idx, ThenBlock);
+ Phi.addIncoming(V, ElseBlock);
}
}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 3b19ba1b50f2..16af2c7b808b 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -493,17 +493,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
+ for (const PHINode &PN : BI.phis()) {
// PHI nodes may have been remapped to non-PHI nodes by the caller or
// during the cloning process.
- if (const PHINode *PN = dyn_cast<PHINode>(I)) {
- if (isa<PHINode>(VMap[PN]))
- PHIToResolve.push_back(PN);
- else
- break;
- } else {
+ if (isa<PHINode>(VMap[&PN]))
+ PHIToResolve.push_back(&PN);
+ else
break;
- }
}
// Finally, remap the terminator instructions, as those can't be remapped
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a1961eecb391..acccf7abf808 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -105,21 +105,17 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
IRBuilder<> Builder(T);
// Branch - See if we are conditional jumping on constant
- if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+ if (auto *BI = dyn_cast<BranchInst>(T)) {
if (BI->isUnconditional()) return false; // Can't optimize uncond branch
BasicBlock *Dest1 = BI->getSuccessor(0);
BasicBlock *Dest2 = BI->getSuccessor(1);
- if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
// Are we branching on constant?
// YES. Change to unconditional branch...
BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
- //cerr << "Function: " << T->getParent()->getParent()
- // << "\nRemoving branch from " << T->getParent()
- // << "\n\nTo: " << OldDest << endl;
-
// Let the basic block know that we are letting go of it. Based on this,
// it will adjust it's PHI nodes.
OldDest->removePredecessor(BB);
@@ -150,10 +146,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
return false;
}
- if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ if (auto *SI = dyn_cast<SwitchInst>(T)) {
// If we are switching on a constant, we can convert the switch to an
// unconditional branch.
- ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+ auto *CI = dyn_cast<ConstantInt>(SI->getCondition());
BasicBlock *DefaultDest = SI->getDefaultDest();
BasicBlock *TheOnlyDest = DefaultDest;
@@ -280,9 +276,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
return false;
}
- if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+ if (auto *IBI = dyn_cast<IndirectBrInst>(T)) {
// indirectbr blockaddress(@F, @BB) -> br label @BB
- if (BlockAddress *BA =
+ if (auto *BA =
dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
BasicBlock *TheOnlyDest = BA->getBasicBlock();
// Insert the new branch.
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index dc98a39adcc5..92dfb1c7204d 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -258,11 +258,8 @@ static bool isEpilogProfitable(Loop *L) {
BasicBlock *PreHeader = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
assert(PreHeader && Header);
- for (Instruction &BBI : *Header) {
- PHINode *PN = dyn_cast<PHINode>(&BBI);
- if (!PN)
- break;
- if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
+ for (const PHINode &PN : Header->phis()) {
+ if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
return true;
}
return false;
@@ -611,13 +608,12 @@ LoopUnrollResult llvm::UnrollLoop(
for (BasicBlock *Succ : successors(*BB)) {
if (L->contains(Succ))
continue;
- for (BasicBlock::iterator BBI = Succ->begin();
- PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
- Value *Incoming = phi->getIncomingValueForBlock(*BB);
+ for (PHINode &PHI : Succ->phis()) {
+ Value *Incoming = PHI.getIncomingValueForBlock(*BB);
ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
if (It != LastValueMap.end())
Incoming = It->second;
- phi->addIncoming(Incoming, New);
+ PHI.addIncoming(Incoming, New);
}
}
// Keep track of new headers and latches as we create them, so that
@@ -721,10 +717,8 @@ LoopUnrollResult llvm::UnrollLoop(
for (BasicBlock *Succ: successors(BB)) {
if (Succ == Headers[i])
continue;
- for (BasicBlock::iterator BBI = Succ->begin();
- PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
- Phi->removeIncomingValue(BB, false);
- }
+ for (PHINode &Phi : Succ->phis())
+ Phi.removeIncomingValue(BB, false);
}
}
// Replace the conditional branch with an unconditional one.
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index e00541d3c812..f79f423ce019 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -80,25 +80,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// The new PHI node value is added as an operand of a PHI node in either
// the loop header or the loop exit block.
for (BasicBlock *Succ : successors(Latch)) {
- for (Instruction &BBI : *Succ) {
- PHINode *PN = dyn_cast<PHINode>(&BBI);
- // Exit when we passed all PHI nodes.
- if (!PN)
- break;
+ for (PHINode &PN : Succ->phis()) {
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
- if (L->contains(PN)) {
- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
+ if (L->contains(&PN)) {
+ NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
PreHeader);
} else {
- NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+ NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
}
- Value *V = PN->getIncomingValueForBlock(Latch);
+ Value *V = PN.getIncomingValueForBlock(Latch);
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (L->contains(I)) {
V = VMap.lookup(I);
@@ -111,10 +107,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
- if (L->contains(PN)) {
- PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
+ if (L->contains(&PN)) {
+ PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);
} else {
- PN->addIncoming(NewPN, PrologExit);
+ PN.addIncoming(NewPN, PrologExit);
}
}
}
@@ -191,11 +187,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Exit (EpilogPN)
// Update PHI nodes at NewExit and Exit.
- for (Instruction &BBI : *NewExit) {
- PHINode *PN = dyn_cast<PHINode>(&BBI);
- // Exit when we passed all PHI nodes.
- if (!PN)
- break;
+ for (PHINode &PN : NewExit->phis()) {
// PN should be used in another PHI located in Exit block as
// Exit was split by SplitBlockPredecessors into Exit and NewExit
// Basicaly it should look like:
@@ -207,14 +199,14 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
//
// There is EpilogPreHeader incoming block instead of NewExit as
// NewExit was spilt 1 more time to get EpilogPreHeader.
- assert(PN->hasOneUse() && "The phi should have 1 use");
- PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
+ assert(PN.hasOneUse() && "The phi should have 1 use");
+ PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
// Add incoming PreHeader from branch around the Loop
- PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
+ PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
- Value *V = PN->getIncomingValueForBlock(Latch);
+ Value *V = PN.getIncomingValueForBlock(Latch);
Instruction *I = dyn_cast<Instruction>(V);
if (I && L->contains(I))
// If value comes from an instruction in the loop add VMap value.
@@ -242,23 +234,19 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Skip this as we already updated phis in exit blocks.
if (!L->contains(Succ))
continue;
- for (Instruction &BBI : *Succ) {
- PHINode *PN = dyn_cast<PHINode>(&BBI);
- // Exit when we passed all PHI nodes.
- if (!PN)
- break;
+ for (PHINode &PN : Succ->phis()) {
// Add new PHI nodes to the loop exit block and update epilog
// PHIs with the new PHI values.
- PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
+ PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
NewExit->getFirstNonPHI());
// Adding a value to the new PHI node from the unrolling loop preheader.
- NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
+ NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
// Adding a value to the new PHI node from the unrolling loop latch.
- NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
+ NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
// Update the existing PHI node operand with the value from the new PHI
// node. Corresponding instruction in epilog loop should be PHI.
- PHINode *VPN = cast<PHINode>(VMap[&BBI]);
+ PHINode *VPN = cast<PHINode>(VMap[&PN]);
VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
}
}
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index fe106e33bca1..a5a305ef582b 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -1321,13 +1321,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// Rewrite phis in the exit block to get their inputs from the Preheader
// instead of the exiting block.
- BasicBlock::iterator BI = ExitBlock->begin();
- while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ for (PHINode &P : ExitBlock->phis()) {
// Set the zero'th element of Phi to be from the preheader and remove all
// other incoming values. Given the loop has dedicated exits, all other
// incoming values must be from the exiting blocks.
int PredIndex = 0;
- P->setIncomingBlock(PredIndex, Preheader);
+ P.setIncomingBlock(PredIndex, Preheader);
// Removes all incoming values from all other exiting blocks (including
// duplicate values from an exiting block).
// Nuke all entries except the zero'th entry which is the preheader entry.
@@ -1335,13 +1334,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// below, to keep the indices valid for deletion (removeIncomingValues
// updates getNumIncomingValues and shifts all values down into the operand
// being deleted).
- for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i)
- P->removeIncomingValue(e - i, false);
+ for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
+ P.removeIncomingValue(e - i, false);
- assert((P->getNumIncomingValues() == 1 &&
- P->getIncomingBlock(PredIndex) == Preheader) &&
+ assert((P.getNumIncomingValues() == 1 &&
+ P.getIncomingBlock(PredIndex) == Preheader) &&
"Should have exactly one value and that's from the preheader!");
- ++BI;
}
// Disconnect the loop body by branching directly to its exit.
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index e4b20b0faa15..b2231d68a301 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -147,11 +147,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
if (isa<PHINode>(BB->begin())) {
SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(),
PredValues.end());
- PHINode *SomePHI;
- for (BasicBlock::iterator It = BB->begin();
- (SomePHI = dyn_cast<PHINode>(It)); ++It) {
- if (IsEquivalentPHI(SomePHI, ValueMapping))
- return SomePHI;
+ for (PHINode &SomePHI : BB->phis()) {
+ if (IsEquivalentPHI(&SomePHI, ValueMapping))
+ return &SomePHI;
}
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index e7358dbcb624..7c195788e416 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -283,12 +283,8 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
/// of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
- if (!isa<PHINode>(Succ->begin()))
- return; // Quick exit if nothing to do
-
- PHINode *PN;
- for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
- PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+ for (PHINode &PN : Succ->phis())
+ PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
}
/// Compute an abstract "cost" of speculating the given instruction,
@@ -1228,11 +1224,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
Instruction *I1, Instruction *I2) {
for (BasicBlock *Succ : successors(BB1)) {
- PHINode *PN;
- for (BasicBlock::iterator BBI = Succ->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- Value *BB1V = PN->getIncomingValueForBlock(BB1);
- Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ for (const PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
return false;
}
@@ -1282,6 +1276,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
if (isa<TerminatorInst>(I1))
goto HoistTerminator;
+ // If we're going to hoist a call, make sure that the two instructions we're
+ // commoning/hoisting are both marked with musttail, or neither of them is
+ // marked as such. Otherwise, we might end up in a situation where we hoist
+ // from a block where the terminator is a `ret` to a block where the terminator
+ // is a `br`, and `musttail` calls expect to be followed by a return.
+ auto *C1 = dyn_cast<CallInst>(I1);
+ auto *C2 = dyn_cast<CallInst>(I2);
+ if (C1 && C2)
+ if (C1->isMustTailCall() != C2->isMustTailCall())
+ return Changed;
+
if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
return Changed;
@@ -1332,18 +1337,16 @@ HoistTerminator:
return Changed;
for (BasicBlock *Succ : successors(BB1)) {
- PHINode *PN;
- for (BasicBlock::iterator BBI = Succ->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- Value *BB1V = PN->getIncomingValueForBlock(BB1);
- Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ for (PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
if (BB1V == BB2V)
continue;
// Check for passingValueIsAlwaysUndefined here because we would rather
// eliminate undefined control flow then converting it to a select.
- if (passingValueIsAlwaysUndefined(BB1V, PN) ||
- passingValueIsAlwaysUndefined(BB2V, PN))
+ if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
+ passingValueIsAlwaysUndefined(BB2V, &PN))
return Changed;
if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
@@ -1369,11 +1372,9 @@ HoistTerminator:
// nodes, so we insert select instruction to compute the final result.
std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
for (BasicBlock *Succ : successors(BB1)) {
- PHINode *PN;
- for (BasicBlock::iterator BBI = Succ->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- Value *BB1V = PN->getIncomingValueForBlock(BB1);
- Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ for (PHINode &PN : Succ->phis()) {
+ Value *BB1V = PN.getIncomingValueForBlock(BB1);
+ Value *BB2V = PN.getIncomingValueForBlock(BB2);
if (BB1V == BB2V)
continue;
@@ -1386,9 +1387,9 @@ HoistTerminator:
BB1V->getName() + "." + BB2V->getName(), BI));
// Make the PHI node use the select for all incoming values for BB1/BB2
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
- PN->setIncomingValue(i, SI);
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
+ PN.setIncomingValue(i, SI);
}
}
@@ -1999,10 +2000,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Check that the PHI nodes can be converted to selects.
bool HaveRewritablePHIs = false;
- for (BasicBlock::iterator I = EndBB->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- Value *OrigV = PN->getIncomingValueForBlock(BB);
- Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
+ for (PHINode &PN : EndBB->phis()) {
+ Value *OrigV = PN.getIncomingValueForBlock(BB);
+ Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
// FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
// Skip PHIs which are trivial.
@@ -2010,8 +2010,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
continue;
// Don't convert to selects if we could remove undefined behavior instead.
- if (passingValueIsAlwaysUndefined(OrigV, PN) ||
- passingValueIsAlwaysUndefined(ThenV, PN))
+ if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
+ passingValueIsAlwaysUndefined(ThenV, &PN))
return false;
HaveRewritablePHIs = true;
@@ -2072,12 +2072,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
- for (BasicBlock::iterator I = EndBB->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- unsigned OrigI = PN->getBasicBlockIndex(BB);
- unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
- Value *OrigV = PN->getIncomingValue(OrigI);
- Value *ThenV = PN->getIncomingValue(ThenI);
+ for (PHINode &PN : EndBB->phis()) {
+ unsigned OrigI = PN.getBasicBlockIndex(BB);
+ unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN.getIncomingValue(OrigI);
+ Value *ThenV = PN.getIncomingValue(ThenI);
// Skip PHIs which are trivial.
if (OrigV == ThenV)
@@ -2091,8 +2090,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
std::swap(TrueV, FalseV);
Value *V = Builder.CreateSelect(
BrCond, TrueV, FalseV, "spec.select", BI);
- PN->setIncomingValue(OrigI, V);
- PN->setIncomingValue(ThenI, V);
+ PN.setIncomingValue(OrigI, V);
+ PN.setIncomingValue(ThenI, V);
}
// Remove speculated dbg intrinsics.
@@ -3335,17 +3334,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// it. If it has PHIs though, the PHIs may have different
// entries for BB and PBI's BB. If so, insert a select to make
// them agree.
- PHINode *PN;
- for (BasicBlock::iterator II = CommonDest->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- Value *BIV = PN->getIncomingValueForBlock(BB);
- unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
- Value *PBIV = PN->getIncomingValue(PBBIdx);
+ for (PHINode &PN : CommonDest->phis()) {
+ Value *BIV = PN.getIncomingValueForBlock(BB);
+ unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN.getIncomingValue(PBBIdx);
if (BIV != PBIV) {
// Insert a select in PBI to pick the right value.
SelectInst *NV = cast<SelectInst>(
Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
- PN->setIncomingValue(PBBIdx, NV);
+ PN.setIncomingValue(PBBIdx, NV);
// Although the select has the same condition as PBI, the original branch
// weights for PBI do not apply to the new select because the select's
// 'logical' edges are incoming edges of the phi that is eliminated, not
@@ -4451,17 +4448,16 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
BasicBlock *Succ = Branch->getSuccessor(0);
- BasicBlock::iterator I = Succ->begin();
- while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
- int Idx = PHI->getBasicBlockIndex(BB);
+ for (PHINode &PHI : Succ->phis()) {
+ int Idx = PHI.getBasicBlockIndex(BB);
assert(Idx >= 0 && "PHI has no entry for predecessor?");
- Value *InValue = PHI->getIncomingValue(Idx);
+ Value *InValue = PHI.getIncomingValue(Idx);
if (InValue != CaseValue)
continue;
*PhiIndex = Idx;
- return PHI;
+ return &PHI;
}
return nullptr;
@@ -4491,19 +4487,16 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
// -->
// %r = phi i32 ... [ %x, %switchbb ] ...
- for (Instruction &InstInCaseDest : *CaseDest) {
- auto *Phi = dyn_cast<PHINode>(&InstInCaseDest);
- if (!Phi) break;
-
+ for (PHINode &Phi : CaseDest->phis()) {
// This only works if there is exactly 1 incoming edge from the switch to
// a phi. If there is >1, that means multiple cases of the switch map to 1
// value in the phi, and that phi value is not the switch condition. Thus,
// this transform would not make sense (the phi would be invalid because
// a phi can't have different incoming values from the same block).
- int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock);
- if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue &&
- count(Phi->blocks(), SwitchBlock) == 1) {
- Phi->setIncomingValue(SwitchBBIdx, SI->getCondition());
+ int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
+ if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
+ count(Phi.blocks(), SwitchBlock) == 1) {
+ Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
Changed = true;
}
}
@@ -4656,14 +4649,13 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
return false;
// Get the values for this case from phi nodes in the destination block.
- BasicBlock::iterator I = (*CommonDest)->begin();
- while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
- int Idx = PHI->getBasicBlockIndex(Pred);
+ for (PHINode &PHI : (*CommonDest)->phis()) {
+ int Idx = PHI.getBasicBlockIndex(Pred);
if (Idx == -1)
continue;
Constant *ConstVal =
- LookupConstant(PHI->getIncomingValue(Idx), ConstantPool);
+ LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
if (!ConstVal)
return false;
@@ -4671,7 +4663,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
if (!ValidLookupTableConstant(ConstVal, TTI))
return false;
- Res.push_back(std::make_pair(PHI, ConstVal));
+ Res.push_back(std::make_pair(&PHI, ConstVal));
}
return Res.size() > 0;
@@ -5946,14 +5938,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
- for (BasicBlock::iterator i = BB->begin();
- PHINode *PHI = dyn_cast<PHINode>(i); ++i)
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) {
- TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator();
+ for (PHINode &PHI : BB->phis())
+ for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
+ if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
+ TerminatorInst *T = PHI.getIncomingBlock(i)->getTerminator();
IRBuilder<> Builder(T);
if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
- BB->removePredecessor(PHI->getIncomingBlock(i));
+ BB->removePredecessor(PHI.getIncomingBlock(i));
// Turn uncoditional branches into unreachables and remove the dead
// destination from conditional branches.
if (BI->isUnconditional())
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 52f32cda2609..6ef54385c452 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4164,15 +4164,12 @@ void InnerLoopVectorizer::fixCrossIterationPHIs() {
// the currently empty PHI nodes. At this point every instruction in the
// original loop is widened to a vector form so we can use them to construct
// the incoming edges.
- for (Instruction &I : *OrigLoop->getHeader()) {
- PHINode *Phi = dyn_cast<PHINode>(&I);
- if (!Phi)
- break;
+ for (PHINode &Phi : OrigLoop->getHeader()->phis()) {
// Handle first-order recurrences and reductions that need to be fixed.
- if (Legal->isFirstOrderRecurrence(Phi))
- fixFirstOrderRecurrence(Phi);
- else if (Legal->isReductionVariable(Phi))
- fixReduction(Phi);
+ if (Legal->isFirstOrderRecurrence(&Phi))
+ fixFirstOrderRecurrence(&Phi);
+ else if (Legal->isReductionVariable(&Phi))
+ fixReduction(&Phi);
}
}
@@ -4337,12 +4334,9 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// vector recurrence we extracted in the middle block. Since the loop is in
// LCSSA form, we just need to find the phi node for the original scalar
// recurrence in the exit block, and then add an edge for the middle block.
- for (auto &I : *LoopExitBlock) {
- auto *LCSSAPhi = dyn_cast<PHINode>(&I);
- if (!LCSSAPhi)
- break;
- if (LCSSAPhi->getIncomingValue(0) == Phi) {
- LCSSAPhi->addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
+ if (LCSSAPhi.getIncomingValue(0) == Phi) {
+ LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
break;
}
}
@@ -4499,21 +4493,15 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// inside and outside of the scalar remainder loop.
// We know that the loop is in LCSSA form. We need to update the
// PHI nodes in the exit blocks.
- for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
- LEE = LoopExitBlock->end();
- LEI != LEE; ++LEI) {
- PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi)
- break;
-
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
- assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+ assert(LCSSAPhi.getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
// We found a reduction value exit-PHI. Update it with the
// incoming bypass edge.
- if (LCSSAPhi->getIncomingValue(0) == LoopExitInst)
- LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+ if (LCSSAPhi.getIncomingValue(0) == LoopExitInst)
+ LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
} // end of the LCSSA phi scan.
// Fix the scalar loop reduction variable with the incoming reduction sum
@@ -4528,14 +4516,11 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
}
void InnerLoopVectorizer::fixLCSSAPHIs() {
- for (Instruction &LEI : *LoopExitBlock) {
- auto *LCSSAPhi = dyn_cast<PHINode>(&LEI);
- if (!LCSSAPhi)
- break;
- if (LCSSAPhi->getNumIncomingValues() == 1) {
- assert(OrigLoop->isLoopInvariant(LCSSAPhi->getIncomingValue(0)) &&
+ for (PHINode &LCSSAPhi : LoopExitBlock->phis()) {
+ if (LCSSAPhi.getNumIncomingValues() == 1) {
+ assert(OrigLoop->isLoopInvariant(LCSSAPhi.getIncomingValue(0)) &&
"Incoming value isn't loop invariant");
- LCSSAPhi->addIncoming(LCSSAPhi->getIncomingValue(0), LoopMiddleBlock);
+ LCSSAPhi.addIncoming(LCSSAPhi.getIncomingValue(0), LoopMiddleBlock);
}
}
}
@@ -4981,11 +4966,8 @@ void InnerLoopVectorizer::updateAnalysis() {
/// Phi nodes with constant expressions that can trap are not safe to if
/// convert.
static bool canIfConvertPHINodes(BasicBlock *BB) {
- for (Instruction &I : *BB) {
- auto *Phi = dyn_cast<PHINode>(&I);
- if (!Phi)
- return true;
- for (Value *V : Phi->incoming_values())
+ for (PHINode &Phi : BB->phis()) {
+ for (Value *V : Phi.incoming_values())
if (auto *C = dyn_cast<Constant>(V))
if (C->canTrap())
return false;