summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-01-24 20:23:48 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-01-24 20:23:48 +0000
commita096e0bdf6cfa020569afca490d8e4c9ac8ebb01 (patch)
tree39ef21ba905e021d44b9a5fb47336d4a864da27e /lib
parentd215fd3b74b90f5dc1964610926fcc2a20f959aa (diff)
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp7
-rw-r--r--lib/CodeGen/GlobalMerge.cpp3
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp116
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp21
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp15
-rw-r--r--lib/Linker/IRMover.cpp7
-rw-r--r--lib/MC/MCCodeView.cpp69
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp34
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp20
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp43
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td12
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp62
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp3
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp2
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp108
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp9
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp61
22 files changed, 396 insertions, 261 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 9dc1ab4e6bb5..26ca8d4ee88c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -2700,8 +2700,13 @@ public:
// we still need to collect it due to original value is different.
// And later we will need all original values as anchors during
// finding the common Phi node.
+ // We also must reject the case when base offset is different and
+ // scale reg is not null, we cannot handle this case due to merge of
+ // different offsets will be used as ScaleReg.
if (DifferentField != ExtAddrMode::MultipleFields &&
- DifferentField != ExtAddrMode::ScaleField) {
+ DifferentField != ExtAddrMode::ScaleField &&
+ (DifferentField != ExtAddrMode::BaseOffsField ||
+ !NewAddrMode.ScaledReg)) {
AddrModes.emplace_back(NewAddrMode);
return true;
}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 8b9545da914e..3888226fa059 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -577,7 +577,8 @@ bool GlobalMerge::doInitialization(Module &M) {
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
if (GV.isDeclaration() || GV.isThreadLocal() ||
- GV.hasSection() || GV.hasImplicitSection())
+ GV.hasSection() || GV.hasImplicitSection() ||
+ GV.hasDLLExportStorageClass())
continue;
// It's not safe to merge globals that may be preempted
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 45078081987a..11acbe687a31 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -719,15 +719,14 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
CurSrcPair = Pair;
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
!DisableAdvCopyOpt, TII);
- ValueTrackerResult Res;
- bool ShouldRewrite = false;
- do {
- // Follow the chain of copies until we reach the top of the use-def chain
- // or find a more suitable source.
- Res = ValTracker.getNextSource();
+ // Follow the chain of copies until we find a more suitable source, a phi
+ // or have to abort.
+ while (true) {
+ ValueTrackerResult Res = ValTracker.getNextSource();
+ // Abort at the end of a chain (without finding a suitable source).
if (!Res.isValid())
- break;
+ return false;
// Insert the Def -> Use entry for the recently found source.
ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
@@ -763,24 +762,19 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
return false;
+ // Keep following the chain if the value isn't any better yet.
const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
- ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
- CurSrcPair.SubReg);
- } while (!ShouldRewrite);
-
- // Continue looking for new sources...
- if (Res.isValid())
- continue;
+ if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg))
+ continue;
- // Do not continue searching for a new source if the there's at least
- // one use-def which cannot be rewritten.
- if (!ShouldRewrite)
- return false;
- }
+ // We currently cannot deal with subreg operands on PHI instructions
+ // (see insertPHI()).
+ if (PHICount > 0 && CurSrcPair.SubReg != 0)
+ continue;
- if (PHICount >= RewritePHILimit) {
- DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
- return false;
+ // We found a suitable source, and are done with this chain.
+ break;
+ }
}
// If we did not find a more suitable source, there is nothing to optimize.
@@ -799,6 +793,9 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
+ // NewRC is only correct if no subregisters are involved. findNextSource()
+ // should have rejected those cases already.
+ assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
unsigned NewVR = MRI->createVirtualRegister(NewRC);
MachineBasicBlock *MBB = OrigPHI->getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 81bff4d7eefa..2c6b724c02df 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
EVT ExtVT;
if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
- // Only add this load if we can make it more narrow.
- if (ExtVT.bitsLT(Load->getMemoryVT()))
+
+ // ZEXTLOAD is already small enough.
+ if (Load->getExtensionType() == ISD::ZEXTLOAD &&
+ ExtVT.bitsGE(Load->getMemoryVT()))
+ continue;
+
+ // Use LE to convert equal sized loads to zext.
+ if (ExtVT.bitsLE(Load->getMemoryVT()))
Loads.insert(Load);
+
continue;
}
return false;
@@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
if (Loads.size() == 0)
return false;
+ DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
SDValue MaskOp = N->getOperand(1);
// If it exists, fixup the single node we allow in the tree that needs
// masking.
if (FixupNode) {
+ DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
FixupNode->getValueType(0),
SDValue(FixupNode, 0), MaskOp);
@@ -3914,14 +3923,21 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
// Narrow any constants that need it.
for (auto *LogicN : NodesWithConsts) {
- auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
- SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
- SDValue(C, 0), MaskOp);
- DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
+ SDValue Op0 = LogicN->getOperand(0);
+ SDValue Op1 = LogicN->getOperand(1);
+
+ if (isa<ConstantSDNode>(Op0))
+ std::swap(Op0, Op1);
+
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+ Op1, MaskOp);
+
+ DAG.UpdateNodeOperands(LogicN, Op0, And);
}
// Create narrow loads.
for (auto *Load : Loads) {
+ DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
SDValue(Load, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
@@ -5209,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
@@ -12928,7 +12944,7 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
EVT MemVT = St->getMemoryVT();
SDValue Val = peekThroughBitcast(St->getValue());
@@ -12949,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates(
EVT LoadVT;
if (IsLoadSrc) {
auto *Ld = cast<LoadSDNode>(Val);
- LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ LBasePtr = BaseIndexOffset::match(Ld, DAG);
LoadVT = Ld->getMemoryVT();
// Load and store should be the same type.
if (MemVT != LoadVT)
@@ -12968,7 +12984,7 @@ void DAGCombiner::getStoreMergeCandidates(
return false;
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+ auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
@@ -12992,7 +13008,7 @@ void DAGCombiner::getStoreMergeCandidates(
Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
}
- Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+ Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
@@ -13365,7 +13381,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
@@ -17432,44 +17448,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
int64_t PtrDiff;
- if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
- return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
+ if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
+ return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
- // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
- // able to calculate their relative offset if at least one arises
- // from an alloca. However, these allocas cannot overlap and we
- // can infer there is no alias.
- if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
- if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // If the base are the same frame index but the we couldn't find a
- // constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
- !MFI.isFixedObjectIndex(B->getIndex())))
- return false;
- }
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex())))
+ return false;
+ }
- bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
- bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
- bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
- bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
- bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
- bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
- return false;
+ // If of mismatched base types or checkable indices we can check
+ // they do not alias.
+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+ return false;
+ }
- // If we know required SrcValue1 and SrcValue2 have relatively large alignment
- // compared to the size and offset of the access, we may be able to prove they
- // do not alias. This check is conservative for now to catch cases created by
- // splitting vector types.
+ // If we know required SrcValue1 and SrcValue2 have relatively large
+ // alignment compared to the size and offset of the access, we may be able
+ // to prove they do not alias. This check is conservative for now to catch
+ // cases created by splitting vector types.
int64_t SrcValOffset0 = Op0->getSrcValueOffset();
int64_t SrcValOffset1 = Op1->getSrcValueOffset();
unsigned OrigAlignment0 = Op0->getOriginalAlignment();
@@ -17479,8 +17497,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
- // There is no overlap between these relatively aligned accesses of similar
- // size. Return no alias.
+ // There is no overlap between these relatively aligned accesses of
+ // similar size. Return no alias.
if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
(OffAlign1 + NumBytes1) <= OffAlign0)
return false;
@@ -17643,7 +17661,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -17669,7 +17687,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bb1dc17b7a1b..b566c232cbc3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ZERO_EXTEND:
LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
DAG.getValueType(AtomicType));
- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
ExtRes = LHS;
break;
case ISD::ANY_EXTEND:
LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
break;
default:
llvm_unreachable("Invalid atomic op extension");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4c8b63d2f239..3ffc6fa9a059 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7947,11 +7947,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
if (VT.getSizeInBits() / 8 != Bytes)
return false;
- SDValue Loc = LD->getOperand(1);
- SDValue BaseLoc = Base->getOperand(1);
-
- auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
- auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+ auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+ auto LocDecomp = BaseIndexOffset::match(LD, *this);
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d5980919d03c..da1574f60524 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -21,6 +21,9 @@ using namespace llvm;
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
+ // Conservatively fail if we a match failed..
+ if (!Base.getNode() || !Other.Base.getNode())
+ return false;
// Initial Offset difference.
Off = Other.Offset - Offset;
@@ -72,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
+BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
+ const SelectionDAG &DAG) {
+ SDValue Ptr = N->getBasePtr();
+
// (((B + I*M) + c)) + c ...
SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
SDValue Index = SDValue();
int64_t Offset = 0;
bool IsIndexSignExt = false;
+ // pre-inc/pre-dec ops are components of EA.
+ if (N->getAddressingMode() == ISD::PRE_INC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset += C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ } else if (N->getAddressingMode() == ISD::PRE_DEC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset -= C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ }
+
// Consume constant adds & ors with appropriate masking.
while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 224ae1a3236a..b29a33ac1c14 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -132,9 +132,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
- // Darwin 10 and higher has an optimized __bzero.
- if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) {
- setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero");
+ // Some darwins have an optimized __bzero/bzero function.
+ switch (TT.getArch()) {
+ case Triple::x86:
+ case Triple::x86_64:
+ if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
+ setLibcallName(RTLIB::BZERO, "__bzero");
+ break;
+ case Triple::aarch64:
+ setLibcallName(RTLIB::BZERO, "bzero");
+ break;
+ default:
+ break;
}
if (darwinHasSinCos(TT)) {
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index ee067a912e3c..f7170e714b9b 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -954,7 +954,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
NewGV->setLinkage(GlobalValue::InternalLinkage);
Constant *C = NewGV;
- if (DGV)
+ // Only create a bitcast if necessary. In particular, with
+ // DebugTypeODRUniquing we may reach metadata in the destination module
+ // containing a GV from the source module, in which case SGV will be
+ // the same as DGV and NewGV, and TypeMap.get() will assert since it
+ // assumes it is being invoked on a type in the source module.
+ if (DGV && NewGV != SGV)
C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
if (DGV && NewGV != DGV) {
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 82b81ccc24da..5fd5bde9f1eb 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -76,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber,
return true;
}
+MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) {
+ if (FuncId >= Functions.size())
+ return nullptr;
+ if (Functions[FuncId].isUnallocatedFunctionInfo())
+ return nullptr;
+ return &Functions[FuncId];
+}
+
bool CodeViewContext::recordFunctionId(unsigned FuncId) {
if (FuncId >= Functions.size())
Functions.resize(FuncId + 1);
@@ -247,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,
OS.EmitValueImpl(SRE, 4);
}
+void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
+ size_t Offset = MCCVLines.size();
+ auto I = MCCVLineStartStop.insert(
+ {LineEntry.getFunctionId(), {Offset, Offset + 1}});
+ if (!I.second)
+ I.first->second.second = Offset + 1;
+ MCCVLines.push_back(LineEntry);
+}
+
+std::vector<MCCVLineEntry>
+CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
+ std::vector<MCCVLineEntry> FilteredLines;
+ auto I = MCCVLineStartStop.find(FuncId);
+ if (I != MCCVLineStartStop.end()) {
+ MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
+ for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
+ ++Idx) {
+ unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();
+ if (LocationFuncId == FuncId) {
+ // This was a .cv_loc directly for FuncId, so record it.
+ FilteredLines.push_back(MCCVLines[Idx]);
+ } else {
+ // Check if the current location is inlined in this function. If it is,
+ // synthesize a statement .cv_loc at the original inlined call site.
+ auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);
+ if (I != SiteInfo->InlinedAtMap.end()) {
+ MCCVFunctionInfo::LineInfo &IA = I->second;
+ // Only add the location if it differs from the previous location.
+ // Large inlined calls will have many .cv_loc entries and we only need
+ // one line table entry in the parent function.
+ if (FilteredLines.empty() ||
+ FilteredLines.back().getFileNum() != IA.File ||
+ FilteredLines.back().getLine() != IA.Line ||
+ FilteredLines.back().getColumn() != IA.Col) {
+ FilteredLines.push_back(MCCVLineEntry(
+ MCCVLines[Idx].getLabel(),
+ MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));
+ }
+ }
+ }
+ }
+ }
+ return FilteredLines;
+}
+
+std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
+ auto I = MCCVLineStartStop.find(FuncId);
+ // Return an empty extent if there are no cv_locs for this function id.
+ if (I == MCCVLineStartStop.end())
+ return {~0ULL, 0};
+ return I->second;
+}
+
+ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
+ if (R <= L)
+ return None;
+ if (L >= MCCVLines.size())
+ return None;
+ return makeArrayRef(&MCCVLines[L], R - L);
+}
+
void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
unsigned FuncId,
const MCSymbol *FuncBegin,
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index c2d3ae31c624..b85b4e082996 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -868,6 +868,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
+ } else if (TM.getCodeModel() == CodeModel::Large) {
+ // Materialize the global using movz/movk instructions.
+ unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ auto InsertPt = std::next(I.getIterator());
+ auto MovZ =
+ BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
+ .addDef(MovZDstReg);
+ MovZ->addOperand(MF, I.getOperand(1));
+ MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
+ AArch64II::MO_NC);
+ MovZ->addOperand(MF, MachineOperand::CreateImm(0));
+ constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
+
+ auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
+ unsigned Offset, unsigned ForceDstReg) {
+ unsigned DstReg =
+ ForceDstReg ? ForceDstReg
+ : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
+ TII.get(AArch64::MOVKXi))
+ .addDef(DstReg)
+ .addReg(SrcReg);
+ MovI->addOperand(MF, MachineOperand::CreateGA(
+ GV, MovZ->getOperand(1).getOffset(), Flags));
+ MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
+ constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
+ return DstReg;
+ };
+ unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+ AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
+ DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
+ BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
+ I.eraseFromParent();
+ return true;
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 740861851185..f08c50540656 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -821,7 +821,6 @@ namespace llvm {
MutableArrayRef<int> NewMask, unsigned Options = None);
OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
MutableArrayRef<int> NewMask);
- OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);
OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results);
OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
@@ -1139,25 +1138,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
return concat(Out[0], Out[1], Results);
}
-OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {
- DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
-
- int VecLen = SM.Mask.size();
- SmallVector<uint8_t,128> UsedBytes(VecLen);
- bool HasUnused = false;
- for (int I = 0; I != VecLen; ++I) {
- if (SM.Mask[I] != -1)
- UsedBytes[I] = 0xFF;
- else
- HasUnused = true;
- }
- if (!HasUnused)
- return Va;
- SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));
- Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});
- return OpRef::res(Results.top());
-}
-
OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f9de65fcb1df..f0e8b11a3d9c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+ // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
+ case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
+// compared to a value that is atomically loaded (atomic loads zero-extend).
+SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
+ "Expecting an atomic compare-and-swap here.");
+ SDLoc dl(Op);
+ auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = AtomicNode->getMemoryVT();
+ if (MemVT.getSizeInBits() >= 32)
+ return Op;
+
+ SDValue CmpOp = Op.getOperand(2);
+ // If this is already correctly zero-extended, leave it alone.
+ auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
+ if (DAG.MaskedValueIsZero(CmpOp, HighBits))
+ return Op;
+
+ // Clear the high bits of the compare operand.
+ unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
+ SDValue NewCmpOp =
+ DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
+ DAG.getConstant(MaskVal, dl, MVT::i32));
+
+ // Replace the existing compare operand with the properly zero-extended one.
+ SmallVector<SDValue, 4> Ops;
+ for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
+ Ops.push_back(AtomicNode->getOperand(i));
+ Ops[2] = NewCmpOp;
+ MachineMemOperand *MMO = AtomicNode->getMemOperand();
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
+ auto NodeTy =
+ (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
+ return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerREM(Op, DAG);
case ISD::BSWAP:
return LowerBSWAP(Op, DAG);
+ case ISD::ATOMIC_CMP_SWAP:
+ return LowerATOMIC_CMP_SWAP(Op, DAG);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b119e5b4a564..b3215a84829e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -430,6 +430,11 @@ namespace llvm {
/// The 4xf32 load used for v4i1 constants.
QVLFSb,
+ /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+ /// except they ensure that the compare input is zero-extended for
+ /// sub-word versions because the atomic loads zero-extend.
+ ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
+
/// GPRC = TOC_ENTRY GA, TOC
/// Loads the entry for GA from the TOC, where the TOC base is given by
/// the last operand.
@@ -955,6 +960,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a932d05b24ee..43dcc4479cf0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
+// PPC-specific atomic operations.
+def PPCatomicCmpSwap_8 :
+ SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def PPCatomicCmpSwap_16 :
+ SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
@@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {
}
}
+def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
+ (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;
+def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),
+ (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;
+
// Instructions to support atomic operations
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f1ce430f3323..f2ffba7d5418 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2375,6 +2375,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
.Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
Flags |= Prefix;
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ // We don't have real instr with the given prefix
+ // let's use the prefix as the instr.
+ // TODO: there could be several prefixes one after another
+ Flags = X86::IP_NO_PREFIX;
+ break;
+ }
Name = Parser.getTok().getString();
Parser.Lex(); // eat the prefix
// Hack: we could have something like "rep # some comment" or
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a6f56877bd64..e7d9334abe14 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7893,8 +7893,14 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
- return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,
- SDLoc(V), VT, IndicesVec, SrcVec);
+ if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {
+ SrcVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
+ SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
+ }
+ if (VT == MVT::v16i8)
+ return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec);
+ return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec);
}
SDValue
@@ -18262,6 +18268,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
+ // For v64i1 without 64-bit support we need to split and rejoin.
+ if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+ assert(Subtarget.hasBWI() && "Expected BWI to be legal");
+ SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32);
+ SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32);
+ SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32);
+ SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32);
+ SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo);
+ SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+ }
+
if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
SDValue Op1Scalar;
if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
@@ -28652,13 +28670,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
+ SDValue NewV1 = V1; // Save operand in case early exit happens.
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT) &&
+ NewV1, DL, DAG, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- Res = DAG.getBitcast(ShuffleSrcVT, V1);
+ Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
@@ -28680,33 +28699,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
+ SDValue NewV1 = V1; // Save operands in case early exit happens.
+ SDValue NewV2 = V2;
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- V1, V2, DL, DAG, Subtarget, Shuffle,
+ NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleSrcVT, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(ShuffleSrcVT, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
+ NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
+ DCI.AddToWorklist(NewV1.getNode());
+ NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
+ DCI.AddToWorklist(NewV2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, V1, V2, DL, DAG,
- Subtarget, Shuffle, ShuffleVT,
- PermuteImm) &&
+ NewV1 = V1; // Save operands in case early exit happens.
+ NewV2 = V2;
+ if (matchBinaryPermuteVectorShuffle(
+ MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
+ NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
- V1 = DAG.getBitcast(ShuffleVT, V1);
- DCI.AddToWorklist(V1.getNode());
- V2 = DAG.getBitcast(ShuffleVT, V2);
- DCI.AddToWorklist(V2.getNode());
- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
+ NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
+ DCI.AddToWorklist(NewV1.getNode());
+ NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
+ DCI.AddToWorklist(NewV2.getNode());
+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 223eed3048db..967d67a84bc0 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -754,7 +754,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// type remains the same.
if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
MVT LegalVT = LT.second;
- if (LegalVT.getVectorElementType().getSizeInBits() ==
+ if (LegalVT.isVector() &&
+ LegalVT.getVectorElementType().getSizeInBits() ==
Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index c0cd1ea74a74..026fab5dbd3b 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -648,7 +648,7 @@ private:
// track in a CHI. In the PDom walk, there can be values in the
// stack which are not control dependent e.g., nested loop.
if (si != RenameStack.end() && si->second.size() &&
- DT->dominates(Pred, si->second.back()->getParent())) {
+ DT->properlyDominates(Pred, si->second.back()->getParent())) {
C.Dest = BB; // Assign the edge
C.I = si->second.pop_back_val(); // Assign the argument
DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName()
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index b8fb80b6cc26..525425bd0f0c 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
@@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass {
Region *ParentRegion;
DominatorTree *DT;
- LoopInfo *LI;
- SmallVector<RegionNode *, 8> Order;
+ std::deque<RegionNode *> Order;
BBSet Visited;
BBPhiMap DeletedPhis;
@@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass {
void gatherPredicates(RegionNode *N);
- void collectInfos();
+ void analyzeNode(RegionNode *N);
void insertConditions(bool Loops);
@@ -258,7 +256,6 @@ public:
AU.addRequired<DivergenceAnalysis>();
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
- ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
- SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
+ assert(Visited.empty());
+ assert(Predicates.empty());
+ assert(Loops.empty());
+ assert(LoopPreds.empty());
- // The reverse post-order traversal of the list gives us an ordering close
- // to what we want. The only problem with it is that sometimes backedges
- // for outer loops will be visited before backedges for inner loops.
- for (RegionNode *RN : RPOT) {
- BasicBlock *BB = RN->getEntry();
- Loop *Loop = LI->getLoopFor(BB);
- ++LoopBlocks[Loop];
+ // This must be RPO order for the back edge detection to work
+ for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) {
+ // FIXME: Is there a better order to use for structurization?
+ Order.push_back(RN);
+ analyzeNode(RN);
}
-
- unsigned CurrentLoopDepth = 0;
- Loop *CurrentLoop = nullptr;
- for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- BasicBlock *BB = (*I)->getEntry();
- unsigned LoopDepth = LI->getLoopDepth(BB);
-
- if (is_contained(Order, *I))
- continue;
-
- if (LoopDepth < CurrentLoopDepth) {
- // Make sure we have visited all blocks in this loop before moving back to
- // the outer loop.
-
- auto LoopI = I;
- while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
- LoopI++;
- BasicBlock *LoopBB = (*LoopI)->getEntry();
- if (LI->getLoopFor(LoopBB) == CurrentLoop) {
- --BlockCount;
- Order.push_back(*LoopI);
- }
- }
- }
-
- CurrentLoop = LI->getLoopFor(BB);
- if (CurrentLoop)
- LoopBlocks[CurrentLoop]--;
-
- CurrentLoopDepth = LoopDepth;
- Order.push_back(*I);
- }
-
- // This pass originally used a post-order traversal and then operated on
- // the list in reverse. Now that we are using a reverse post-order traversal
- // rather than re-working the whole pass to operate on the list in order,
- // we just reverse the list and continue to operate on it in reverse.
- std::reverse(Order.begin(), Order.end());
}
/// \brief Determine the end of the loops
@@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
}
/// \brief Collect various loop and predicate infos
-void StructurizeCFG::collectInfos() {
- // Reset predicate
- Predicates.clear();
-
- // and loop infos
- Loops.clear();
- LoopPreds.clear();
+void StructurizeCFG::analyzeNode(RegionNode *RN) {
+ DEBUG(dbgs() << "Visiting: "
+ << (RN->isSubRegion() ? "SubRegion with entry: " : "")
+ << RN->getEntry()->getName() << '\n');
- // Reset the visited nodes
- Visited.clear();
-
- for (RegionNode *RN : reverse(Order)) {
- DEBUG(dbgs() << "Visiting: "
- << (RN->isSubRegion() ? "SubRegion with entry: " : "")
- << RN->getEntry()->getName() << " Loop Depth: "
- << LI->getLoopDepth(RN->getEntry()) << "\n");
-
- // Analyze all the conditions leading to a node
- gatherPredicates(RN);
+ // Analyze all the conditions leading to a node
+ gatherPredicates(RN);
- // Remember that we've seen this node
- Visited.insert(RN->getEntry());
+ // Remember that we've seen this node
+ Visited.insert(RN->getEntry());
- // Find the last back edges
- analyzeLoops(RN);
- }
+ // Find the last back edges
+ analyzeLoops(RN);
}
/// \brief Insert the missing branch conditions
@@ -664,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
LLVMContext &Context = Func->getContext();
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
- Order.back()->getEntry();
+ Order.front()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
DT->addNewBlock(Flow, Dominator);
@@ -744,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
/// Take one node from the order vector and wire it up
void StructurizeCFG::wireFlow(bool ExitUseAllowed,
BasicBlock *LoopEnd) {
- RegionNode *Node = Order.pop_back_val();
+ RegionNode *Node = Order.front();
+ Order.pop_front();
Visited.insert(Node->getEntry());
if (isPredictableTrue(Node)) {
@@ -768,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
PrevNode = Node;
while (!Order.empty() && !Visited.count(LoopEnd) &&
- dominatesPredicates(Entry, Order.back())) {
+ dominatesPredicates(Entry, Order.front())) {
handleLoops(false, LoopEnd);
}
@@ -779,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
void StructurizeCFG::handleLoops(bool ExitUseAllowed,
BasicBlock *LoopEnd) {
- RegionNode *Node = Order.back();
+ RegionNode *Node = Order.front();
BasicBlock *LoopStart = Node->getEntry();
if (!Loops.count(LoopStart)) {
@@ -924,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
ParentRegion = R;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
orderNodes();
- collectInfos();
+
createFlow();
insertConditions(false);
insertConditions(true);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6ef54385c452..64f206ea92eb 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2630,9 +2630,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Instruction *LastInduction = VecInd;
for (unsigned Part = 0; Part < UF; ++Part) {
VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
- recordVectorLoopValueForInductionCast(II, LastInduction, Part);
+
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
+ else
+ recordVectorLoopValueForInductionCast(II, LastInduction, Part);
+
LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
}
@@ -2754,15 +2757,17 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// If we haven't yet vectorized the induction variable, splat the scalar
// induction variable, and build the necessary step vectors.
+ // TODO: Don't do it unless the vectorized IV is really required.
if (!VectorizedIV) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part) {
Value *EntryPart =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
- recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
+ else
+ recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
}
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a7ccd3faec44..f301fc361abc 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1347,7 +1347,6 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " <<
Lane << " from " << *Scalar << ".\n");
ExternalUses.emplace_back(Scalar, nullptr, Lane);
- continue;
}
for (User *U : Scalar->users()) {
DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -4417,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
Value *VL[] = { A, B };
- return tryToVectorizeList(VL, R, None, true);
+ return tryToVectorizeList(VL, R, true);
}
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- ArrayRef<Value *> BuildVector,
- bool AllowReorder,
- bool NeedExtraction) {
+ bool AllowReorder) {
if (VL.size() < 2)
return false;
@@ -4517,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< "\n");
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
- ArrayRef<Value *> EmptyArray;
- ArrayRef<Value *> BuildVectorSlice;
- if (!BuildVector.empty())
- BuildVectorSlice = BuildVector.slice(I, OpsWidth);
-
- R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
+ R.buildTree(Ops);
// TODO: check if we can allow reordering for more cases.
if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
@@ -4530,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// reductions. However, at this point, we only expect to get here when
// there are exactly two operations.
assert(Ops.size() == 2);
- assert(BuildVectorSlice.empty());
Value *ReorderedOps[] = {Ops[1], Ops[0]};
R.buildTree(ReorderedOps, None);
}
@@ -4550,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
- Value *VectorizedRoot = R.vectorizeTree();
-
- // Reconstruct the build vector by extracting the vectorized root. This
- // way we handle the case where some elements of the vector are
- // undefined.
- // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
- if (!BuildVectorSlice.empty()) {
- // The insert point is the last build vector instruction. The
- // vectorized root will precede it. This guarantees that we get an
- // instruction. The vectorized tree could have been constant folded.
- Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
- unsigned VecIdx = 0;
- for (auto &V : BuildVectorSlice) {
- IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
- ++BasicBlock::iterator(InsertAfter));
- Instruction *I = cast<Instruction>(V);
- assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
- Instruction *Extract =
- cast<Instruction>(Builder.CreateExtractElement(
- VectorizedRoot, Builder.getInt32(VecIdx++)));
- I->setOperand(1, Extract);
- I->moveAfter(Extract);
- InsertAfter = I;
- }
- }
+ R.vectorizeTree();
// Move to the next bundle.
I += VF - 1;
NextInst = I + 1;
@@ -5495,11 +5462,9 @@ private:
///
/// Returns true if it matches
static bool findBuildVector(InsertElementInst *LastInsertElem,
- SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V = nullptr;
do {
- BuildVector.push_back(LastInsertElem);
BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
V = LastInsertElem->getOperand(0);
if (isa<UndefValue>(V))
@@ -5508,7 +5473,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
if (!LastInsertElem || !LastInsertElem->hasOneUse())
return false;
} while (true);
- std::reverse(BuildVector.begin(), BuildVector.end());
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@@ -5517,11 +5481,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
///
/// \return true if it matches.
static bool findBuildAggregate(InsertValueInst *IV,
- SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V;
do {
- BuildVector.push_back(IV);
BuildVectorOpds.push_back(IV->getInsertedValueOperand());
V = IV->getAggregateOperand();
if (isa<UndefValue>(V))
@@ -5530,7 +5492,6 @@ static bool findBuildAggregate(InsertValueInst *IV,
if (!IV || !IV->hasOneUse())
return false;
} while (true);
- std::reverse(BuildVector.begin(), BuildVector.end());
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@@ -5706,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
if (!R.canMapToVector(IVI->getType(), DL))
return false;
- SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
- if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
+ if (!findBuildAggregate(IVI, BuildVectorOpds))
return false;
DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.
- return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
+ return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
BasicBlock *BB, BoUpSLP &R) {
- SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
- if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
+ if (!findBuildVector(IEI, BuildVectorOpds))
return false;
// Vectorize starting with the build vector operands ignoring the BuildVector
// instructions for the purpose of scheduling and user extraction.
- return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
+ return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
@@ -5804,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// is done when there are exactly two elements since tryToVectorizeList
// asserts that there are only two values when AllowReorder is true.
bool AllowReorder = NumElts == 2;
- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
- None, AllowReorder)) {
+ if (NumElts > 1 &&
+ tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;
Changed = true;