Vendor import of llvm release_60 branch r323338: - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2018-01-24 20:23:48 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2018-01-24 20:23:48 +0000
commit	a096e0bdf6cfa020569afca490d8e4c9ac8ebb01 (patch)
tree	39ef21ba905e021d44b9a5fb47336d4a864da27e /lib
parent	d215fd3b74b90f5dc1964610926fcc2a20f959aa (diff)

vendor/llvm/llvm-release_60-r323338

Notes

Diffstat (limited to 'lib')

-rw-r--r--

lib/CodeGen/CodeGenPrepare.cpp

-rw-r--r--

lib/CodeGen/GlobalMerge.cpp

-rw-r--r--

lib/CodeGen/PeepholeOptimizer.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

116

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

-rw-r--r--

lib/CodeGen/TargetLoweringBase.cpp

-rw-r--r--

lib/Linker/IRMover.cpp

-rw-r--r--

lib/MC/MCCodeView.cpp

-rw-r--r--

lib/Target/AArch64/AArch64InstructionSelector.cpp

-rw-r--r--

lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp

-rw-r--r--

lib/Target/PowerPC/PPCISelLowering.cpp

-rw-r--r--

lib/Target/PowerPC/PPCISelLowering.h

-rw-r--r--

lib/Target/PowerPC/PPCInstrInfo.td

-rw-r--r--

lib/Target/X86/AsmParser/X86AsmParser.cpp

-rw-r--r--

lib/Target/X86/X86ISelLowering.cpp

-rw-r--r--

lib/Target/X86/X86TargetTransformInfo.cpp

-rw-r--r--

lib/Transforms/Scalar/GVNHoist.cpp

-rw-r--r--

lib/Transforms/Scalar/StructurizeCFG.cpp

108

-rw-r--r--

lib/Transforms/Vectorize/LoopVectorize.cpp

-rw-r--r--

lib/Transforms/Vectorize/SLPVectorizer.cpp

22 files changed, 396 insertions, 261 deletions

diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 9dc1ab4e6bb5..26ca8d4ee88c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp

@@ -2700,8 +2700,13 @@ public:

// we still need to collect it due to original value is different.

// And later we will need all original values as anchors during

// finding the common Phi node.

+ // We also must reject the case when base offset is different and

+ // scale reg is not null, we cannot handle this case due to merge of

+ // different offsets will be used as ScaleReg.

if (DifferentField != ExtAddrMode::MultipleFields &&

- DifferentField != ExtAddrMode::ScaleField) {

+ DifferentField != ExtAddrMode::ScaleField &&

+ (DifferentField != ExtAddrMode::BaseOffsField ||

+ !NewAddrMode.ScaledReg)) {

AddrModes.emplace_back(NewAddrMode);

return true;

}

diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 8b9545da914e..3888226fa059 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp

@@ -577,7 +577,8 @@ bool GlobalMerge::doInitialization(Module &M) {

for (auto &GV : M.globals()) {

// Merge is safe for "normal" internal or external globals only

if (GV.isDeclaration() || GV.isThreadLocal() ||

- GV.hasSection() || GV.hasImplicitSection())

+ GV.hasSection() || GV.hasImplicitSection() ||

+ GV.hasDLLExportStorageClass())

continue;

// It's not safe to merge globals that may be preempted

diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 45078081987a..11acbe687a31 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp

@@ -719,15 +719,14 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,

CurSrcPair = Pair;

ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,

!DisableAdvCopyOpt, TII);

- ValueTrackerResult Res;

- bool ShouldRewrite = false;

- do {

- // Follow the chain of copies until we reach the top of the use-def chain

- // or find a more suitable source.

- Res = ValTracker.getNextSource();

+ // Follow the chain of copies until we find a more suitable source, a phi

+ // or have to abort.

+ while (true) {

+ ValueTrackerResult Res = ValTracker.getNextSource();

+ // Abort at the end of a chain (without finding a suitable source).

if (!Res.isValid())

- break;

+ return false;

// Insert the Def -> Use entry for the recently found source.

ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);

@@ -763,24 +762,19 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,

if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))

return false;

+ // Keep following the chain if the value isn't any better yet.

const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);

- ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,

- CurSrcPair.SubReg);

- } while (!ShouldRewrite);

- // Continue looking for new sources...

- if (Res.isValid())

- continue;

+ if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg))

+ continue;

- // Do not continue searching for a new source if the there's at least

- // one use-def which cannot be rewritten.

- if (!ShouldRewrite)

- return false;

- }

+ // We currently cannot deal with subreg operands on PHI instructions

+ // (see insertPHI()).

+ if (PHICount > 0 && CurSrcPair.SubReg != 0)

+ continue;

- if (PHICount >= RewritePHILimit) {

- DEBUG(dbgs() << "findNextSource: PHI limit reached\n");

- return false;

+ // We found a suitable source, and are done with this chain.

+ break;

+ }

}

// If we did not find a more suitable source, there is nothing to optimize.

@@ -799,6 +793,9 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,

assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");

const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);

+ // NewRC is only correct if no subregisters are involved. findNextSource()

+ // should have rejected those cases already.

+ assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");

unsigned NewVR = MRI->createVirtualRegister(NewRC);

MachineBasicBlock *MBB = OrigPHI->getParent();

MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 81bff4d7eefa..2c6b724c02df 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,

EVT ExtVT;

if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&

isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {

- // Only add this load if we can make it more narrow.

- if (ExtVT.bitsLT(Load->getMemoryVT()))

+ // ZEXTLOAD is already small enough.

+ if (Load->getExtensionType() == ISD::ZEXTLOAD &&

+ ExtVT.bitsGE(Load->getMemoryVT()))

+ continue;

+ // Use LE to convert equal sized loads to zext.

+ if (ExtVT.bitsLE(Load->getMemoryVT()))

Loads.insert(Load);

continue;

}

return false;

@@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {

if (Loads.size() == 0)

return false;

+ DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());

SDValue MaskOp = N->getOperand(1);

// If it exists, fixup the single node we allow in the tree that needs

// masking.

if (FixupNode) {

+ DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());

SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),

FixupNode->getValueType(0),

SDValue(FixupNode, 0), MaskOp);

@@ -3914,14 +3923,21 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {

// Narrow any constants that need it.

for (auto *LogicN : NodesWithConsts) {

- auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));

- SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),

- SDValue(C, 0), MaskOp);

- DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);

+ SDValue Op0 = LogicN->getOperand(0);

+ SDValue Op1 = LogicN->getOperand(1);

+ if (isa<ConstantSDNode>(Op0))

+ std::swap(Op0, Op1);

+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),

+ Op1, MaskOp);

+ DAG.UpdateNodeOperands(LogicN, Op0, And);

}

// Create narrow loads.

for (auto *Load : Loads) {

+ DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());

SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),

SDValue(Load, 0), MaskOp);

DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);

@@ -5209,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {

return SDValue();

// Loads must share the same base address

- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);

+ BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);

int64_t ByteOffsetFromBase = 0;

if (!Base)

Base = Ptr;

@@ -12928,7 +12944,7 @@ void DAGCombiner::getStoreMergeCandidates(

StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {

// This holds the base pointer, index, and the offset in bytes from the base

// pointer.

- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);

+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);

EVT MemVT = St->getMemoryVT();

SDValue Val = peekThroughBitcast(St->getValue());

@@ -12949,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates(

EVT LoadVT;

if (IsLoadSrc) {

auto *Ld = cast<LoadSDNode>(Val);

- LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);

+ LBasePtr = BaseIndexOffset::match(Ld, DAG);

LoadVT = Ld->getMemoryVT();

// Load and store should be the same type.

if (MemVT != LoadVT)

@@ -12968,7 +12984,7 @@ void DAGCombiner::getStoreMergeCandidates(

return false;

// The Load's Base Ptr must also match

if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {

- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);

+ auto LPtr = BaseIndexOffset::match(OtherLd, DAG);

if (LoadVT != OtherLd->getMemoryVT())

return false;

if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))

@@ -12992,7 +13008,7 @@ void DAGCombiner::getStoreMergeCandidates(

Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)

return false;

}

- Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);

+ Ptr = BaseIndexOffset::match(Other, DAG);

return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));

};

@@ -13365,7 +13381,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {

if (Ld->getMemoryVT() != MemVT)

break;

- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);

+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);

// If this is not the first ptr that we check.

int64_t LdOffset = 0;

if (LdBasePtr.getBase().getNode()) {

@@ -17432,44 +17448,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {

unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();

// Check for BaseIndexOffset matching.

- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);

- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);

+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);

+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);

int64_t PtrDiff;

- if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))

- return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));

+ if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {

+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))

+ return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));

- // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be

- // able to calculate their relative offset if at least one arises

- // from an alloca. However, these allocas cannot overlap and we

- // can infer there is no alias.

- if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))

- if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {

- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

- // If the base are the same frame index but the we couldn't find a

- // constant offset, (indices are different) be conservative.

- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||

- !MFI.isFixedObjectIndex(B->getIndex())))

- return false;

- }

+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be

+ // able to calculate their relative offset if at least one arises

+ // from an alloca. However, these allocas cannot overlap and we

+ // can infer there is no alias.

+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))

+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {

+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

+ // If the base are the same frame index but the we couldn't find a

+ // constant offset, (indices are different) be conservative.

+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||

+ !MFI.isFixedObjectIndex(B->getIndex())))

+ return false;

+ }

- bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());

- bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());

- bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());

- bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());

- bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());

- bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());

+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());

+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());

+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());

+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());

+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());

+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());

- // If of mismatched base types or checkable indices we can check

- // they do not alias.

- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||

- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&

- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))

- return false;

+ // If of mismatched base types or checkable indices we can check

+ // they do not alias.

+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||

+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&

+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))

+ return false;

+ }

- // If we know required SrcValue1 and SrcValue2 have relatively large alignment

- // compared to the size and offset of the access, we may be able to prove they

- // do not alias. This check is conservative for now to catch cases created by

- // splitting vector types.

+ // If we know required SrcValue1 and SrcValue2 have relatively large

+ // alignment compared to the size and offset of the access, we may be able

+ // to prove they do not alias. This check is conservative for now to catch

+ // cases created by splitting vector types.

int64_t SrcValOffset0 = Op0->getSrcValueOffset();

int64_t SrcValOffset1 = Op1->getSrcValueOffset();

unsigned OrigAlignment0 = Op0->getOriginalAlignment();

@@ -17479,8 +17497,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {

int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;

int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;

- // There is no overlap between these relatively aligned accesses of similar

- // size. Return no alias.

+ // There is no overlap between these relatively aligned accesses of

+ // similar size. Return no alias.

if ((OffAlign0 + NumBytes0) <= OffAlign1 ||

(OffAlign1 + NumBytes1) <= OffAlign0)

return false;

@@ -17643,7 +17661,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {

// This holds the base pointer, index, and the offset in bytes from the base

// pointer.

- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);

+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);

// We must have a base and an offset.

if (!BasePtr.getBase().getNode())

@@ -17669,7 +17687,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {

break;

// Find the base pointer and offset for this memory node.

- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);

+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);

// Check that the base pointer is the same as the original one.

if (!BasePtr.equalBaseIndex(Ptr, DAG))

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bb1dc17b7a1b..b566c232cbc3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

@@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

case ISD::ZERO_EXTEND:

LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,

DAG.getValueType(AtomicType));

- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));

+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);

ExtRes = LHS;

break;

case ISD::ANY_EXTEND:

LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);

- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));

+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);

break;

default:

llvm_unreachable("Invalid atomic op extension");

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4c8b63d2f239..3ffc6fa9a059 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -7947,11 +7947,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,

if (VT.getSizeInBits() / 8 != Bytes)

return false;

- SDValue Loc = LD->getOperand(1);

- SDValue BaseLoc = Base->getOperand(1);

- auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);

- auto LocDecomp = BaseIndexOffset::match(Loc, *this);

+ auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);

+ auto LocDecomp = BaseIndexOffset::match(LD, *this);

int64_t Offset = 0;

if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d5980919d03c..da1574f60524 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

@@ -21,6 +21,9 @@ using namespace llvm;

bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,

const SelectionDAG &DAG, int64_t &Off) {

+ // Conservatively fail if we a match failed..

+ if (!Base.getNode() || !Other.Base.getNode())

+ return false;

// Initial Offset difference.

Off = Other.Offset - Offset;

@@ -72,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,

}

/// Parses tree in Ptr for base, index, offset addresses.

-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {

+BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,

+ const SelectionDAG &DAG) {

+ SDValue Ptr = N->getBasePtr();

// (((B + I*M) + c)) + c ...

SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);

SDValue Index = SDValue();

int64_t Offset = 0;

bool IsIndexSignExt = false;

+ // pre-inc/pre-dec ops are components of EA.

+ if (N->getAddressingMode() == ISD::PRE_INC) {

+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))

+ Offset += C->getSExtValue();

+ else // If unknown, give up now.

+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);

+ } else if (N->getAddressingMode() == ISD::PRE_DEC) {

+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))

+ Offset -= C->getSExtValue();

+ else // If unknown, give up now.

+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);

+ }

// Consume constant adds & ors with appropriate masking.

while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {

if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {

diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 224ae1a3236a..b29a33ac1c14 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp

@@ -132,9 +132,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {

setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");

setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");

- // Darwin 10 and higher has an optimized __bzero.

- if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) {

- setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero");

+ // Some darwins have an optimized __bzero/bzero function.

+ switch (TT.getArch()) {

+ case Triple::x86:

+ case Triple::x86_64:

+ if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))

+ setLibcallName(RTLIB::BZERO, "__bzero");

+ break;

+ case Triple::aarch64:

+ setLibcallName(RTLIB::BZERO, "bzero");

+ break;

+ default:

+ break;

}

if (darwinHasSinCos(TT)) {

diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index ee067a912e3c..f7170e714b9b 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp

@@ -954,7 +954,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,

NewGV->setLinkage(GlobalValue::InternalLinkage);

Constant *C = NewGV;

- if (DGV)

+ // Only create a bitcast if necessary. In particular, with

+ // DebugTypeODRUniquing we may reach metadata in the destination module

+ // containing a GV from the source module, in which case SGV will be

+ // the same as DGV and NewGV, and TypeMap.get() will assert since it

+ // assumes it is being invoked on a type in the source module.

+ if (DGV && NewGV != SGV)

C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));

if (DGV && NewGV != DGV) {

diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 82b81ccc24da..5fd5bde9f1eb 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp

@@ -76,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber,

return true;

}

+MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) {

+ if (FuncId >= Functions.size())

+ return nullptr;

+ if (Functions[FuncId].isUnallocatedFunctionInfo())

+ return nullptr;

+ return &Functions[FuncId];

bool CodeViewContext::recordFunctionId(unsigned FuncId) {

if (FuncId >= Functions.size())

Functions.resize(FuncId + 1);

@@ -247,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,

OS.EmitValueImpl(SRE, 4);

}

+void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {

+ size_t Offset = MCCVLines.size();

+ auto I = MCCVLineStartStop.insert(

+ {LineEntry.getFunctionId(), {Offset, Offset + 1}});

+ if (!I.second)

+ I.first->second.second = Offset + 1;

+ MCCVLines.push_back(LineEntry);

+std::vector<MCCVLineEntry>

+CodeViewContext::getFunctionLineEntries(unsigned FuncId) {

+ std::vector<MCCVLineEntry> FilteredLines;

+ auto I = MCCVLineStartStop.find(FuncId);

+ if (I != MCCVLineStartStop.end()) {

+ MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);

+ for (size_t Idx = I->second.first, End = I->second.second; Idx != End;

+ ++Idx) {

+ unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();

+ if (LocationFuncId == FuncId) {

+ // This was a .cv_loc directly for FuncId, so record it.

+ FilteredLines.push_back(MCCVLines[Idx]);

+ } else {

+ // Check if the current location is inlined in this function. If it is,

+ // synthesize a statement .cv_loc at the original inlined call site.

+ auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);

+ if (I != SiteInfo->InlinedAtMap.end()) {

+ MCCVFunctionInfo::LineInfo &IA = I->second;

+ // Only add the location if it differs from the previous location.

+ // Large inlined calls will have many .cv_loc entries and we only need

+ // one line table entry in the parent function.

+ if (FilteredLines.empty() ||

+ FilteredLines.back().getFileNum() != IA.File ||

+ FilteredLines.back().getLine() != IA.Line ||

+ FilteredLines.back().getColumn() != IA.Col) {

+ FilteredLines.push_back(MCCVLineEntry(

+ MCCVLines[Idx].getLabel(),

+ MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));

+ }

+ return FilteredLines;

+std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {

+ auto I = MCCVLineStartStop.find(FuncId);

+ // Return an empty extent if there are no cv_locs for this function id.

+ if (I == MCCVLineStartStop.end())

+ return {~0ULL, 0};

+ return I->second;

+ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {

+ if (R <= L)

+ return None;

+ if (L >= MCCVLines.size())

+ return None;

+ return makeArrayRef(&MCCVLines[L], R - L);

void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,

unsigned FuncId,

const MCSymbol *FuncBegin,

diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index c2d3ae31c624..b85b4e082996 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp

@@ -868,6 +868,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I,

if (OpFlags & AArch64II::MO_GOT) {

I.setDesc(TII.get(AArch64::LOADgot));

I.getOperand(1).setTargetFlags(OpFlags);

+ } else if (TM.getCodeModel() == CodeModel::Large) {

+ // Materialize the global using movz/movk instructions.

+ unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);

+ auto InsertPt = std::next(I.getIterator());

+ auto MovZ =

+ BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))

+ .addDef(MovZDstReg);

+ MovZ->addOperand(MF, I.getOperand(1));

+ MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |

+ AArch64II::MO_NC);

+ MovZ->addOperand(MF, MachineOperand::CreateImm(0));

+ constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);

+ auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,

+ unsigned Offset, unsigned ForceDstReg) {

+ unsigned DstReg =

+ ForceDstReg ? ForceDstReg

+ : MRI.createVirtualRegister(&AArch64::GPR64RegClass);

+ auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),

+ TII.get(AArch64::MOVKXi))

+ .addDef(DstReg)

+ .addReg(SrcReg);

+ MovI->addOperand(MF, MachineOperand::CreateGA(

+ GV, MovZ->getOperand(1).getOffset(), Flags));

+ MovI->addOperand(MF, MachineOperand::CreateImm(Offset));

+ constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);

+ return DstReg;

+ };

+ unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),

+ AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);

+ DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);

+ BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());

+ I.eraseFromParent();

+ return true;

} else {

I.setDesc(TII.get(AArch64::MOVaddr));

I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);

diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 740861851185..f08c50540656 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp

@@ -821,7 +821,6 @@ namespace llvm {

MutableArrayRef<int> NewMask, unsigned Options = None);

OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,

MutableArrayRef<int> NewMask);

- OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);

OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,

ResultStack &Results);

OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,

@@ -1139,25 +1138,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,

return concat(Out[0], Out[1], Results);

}

-OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {

- DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});

- int VecLen = SM.Mask.size();

- SmallVector<uint8_t,128> UsedBytes(VecLen);

- bool HasUnused = false;

- for (int I = 0; I != VecLen; ++I) {

- if (SM.Mask[I] != -1)

- UsedBytes[I] = 0xFF;

- else

- HasUnused = true;

- }

- if (!HasUnused)

- return Va;

- SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));

- Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});

- return OpRef::res(Results.top());

OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,

ResultStack &Results) {

DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f9de65fcb1df..f0e8b11a3d9c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,

setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);

setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);

+ // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.

+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);

// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.

for (MVT VT : MVT::integer_valuetypes()) {

setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

@@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

case PPCISD::Hi: return "PPCISD::Hi";

case PPCISD::Lo: return "PPCISD::Lo";

case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";

+ case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";

+ case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";

case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";

case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";

case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";

@@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {

return Op;

}

+// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be

+// compared to a value that is atomically loaded (atomic loads zero-extend).

+SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,

+ SelectionDAG &DAG) const {

+ assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&

+ "Expecting an atomic compare-and-swap here.");

+ SDLoc dl(Op);

+ auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());

+ EVT MemVT = AtomicNode->getMemoryVT();

+ if (MemVT.getSizeInBits() >= 32)

+ return Op;

+ SDValue CmpOp = Op.getOperand(2);

+ // If this is already correctly zero-extended, leave it alone.

+ auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());

+ if (DAG.MaskedValueIsZero(CmpOp, HighBits))

+ return Op;

+ // Clear the high bits of the compare operand.

+ unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;

+ SDValue NewCmpOp =

+ DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,

+ DAG.getConstant(MaskVal, dl, MVT::i32));

+ // Replace the existing compare operand with the properly zero-extended one.

+ SmallVector<SDValue, 4> Ops;

+ for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)

+ Ops.push_back(AtomicNode->getOperand(i));

+ Ops[2] = NewCmpOp;

+ MachineMemOperand *MMO = AtomicNode->getMemOperand();

+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);

+ auto NodeTy =

+ (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;

+ return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);

SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,

SelectionDAG &DAG) const {

SDLoc dl(Op);

@@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

return LowerREM(Op, DAG);

case ISD::BSWAP:

return LowerBSWAP(Op, DAG);

+ case ISD::ATOMIC_CMP_SWAP:

+ return LowerATOMIC_CMP_SWAP(Op, DAG);

}

diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b119e5b4a564..b3215a84829e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h

@@ -430,6 +430,11 @@ namespace llvm {

/// The 4xf32 load used for v4i1 constants.

QVLFSb,

+ /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes

+ /// except they ensure that the compare input is zero-extended for

+ /// sub-word versions because the atomic loads zero-extend.

+ ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,

/// GPRC = TOC_ENTRY GA, TOC

/// Loads the entry for GA from the TOC, where the TOC base is given by

/// the last operand.

@@ -955,6 +960,7 @@ namespace llvm {

SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;

+ SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;

diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a932d05b24ee..43dcc4479cf0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td

@@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;

def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,

[SDNPHasChain, SDNPOptInGlue]>;

+// PPC-specific atomic operations.

+def PPCatomicCmpSwap_8 :

+ SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,

+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;

+def PPCatomicCmpSwap_16 :

+ SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,

+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;

def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,

[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;

def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,

@@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {

}

+def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),

+ (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;

+def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),

+ (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;

// Instructions to support atomic operations

let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {

def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),

diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f1ce430f3323..f2ffba7d5418 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp

@@ -2375,6 +2375,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,

.Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)

.Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)

Flags |= Prefix;

+ if (getLexer().is(AsmToken::EndOfStatement)) {

+ // We don't have real instr with the given prefix

+ // let's use the prefix as the instr.

+ // TODO: there could be several prefixes one after another

+ Flags = X86::IP_NO_PREFIX;

+ break;

+ }

Name = Parser.getTok().getString();

Parser.Lex(); // eat the prefix

// Hack: we could have something like "rep # some comment" or

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a6f56877bd64..e7d9334abe14 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -7893,8 +7893,14 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,

IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),

VT.getVectorNumElements());

IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);

- return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,

- SDLoc(V), VT, IndicesVec, SrcVec);

+ if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {

+ SrcVec =

+ DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),

+ SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));

+ }

+ if (VT == MVT::v16i8)

+ return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec);

+ return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec);

}

SDValue

@@ -18262,6 +18268,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {

return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);

}

+ // For v64i1 without 64-bit support we need to split and rejoin.

+ if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {

+ assert(Subtarget.hasBWI() && "Expected BWI to be legal");

+ SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32);

+ SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32);

+ SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32);

+ SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32);

+ SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo);

+ SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi);

+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);

+ }

if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {

SDValue Op1Scalar;

if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))

@@ -28652,13 +28670,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,

}

+ SDValue NewV1 = V1; // Save operand in case early exit happens.

if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,

- V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,

- ShuffleVT) &&

+ NewV1, DL, DAG, Subtarget, Shuffle,

+ ShuffleSrcVT, ShuffleVT) &&

(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {

if (Depth == 1 && Root.getOpcode() == Shuffle)

return SDValue(); // Nothing to do!

- Res = DAG.getBitcast(ShuffleSrcVT, V1);

+ Res = DAG.getBitcast(ShuffleSrcVT, NewV1);

DCI.AddToWorklist(Res.getNode());

Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);

DCI.AddToWorklist(Res.getNode());

@@ -28680,33 +28699,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,

}

+ SDValue NewV1 = V1; // Save operands in case early exit happens.

+ SDValue NewV2 = V2;

if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,

- V1, V2, DL, DAG, Subtarget, Shuffle,

+ NewV1, NewV2, DL, DAG, Subtarget, Shuffle,

ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&

(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {

if (Depth == 1 && Root.getOpcode() == Shuffle)

return SDValue(); // Nothing to do!

- V1 = DAG.getBitcast(ShuffleSrcVT, V1);

- DCI.AddToWorklist(V1.getNode());

- V2 = DAG.getBitcast(ShuffleSrcVT, V2);

- DCI.AddToWorklist(V2.getNode());

- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);

+ NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);

+ DCI.AddToWorklist(NewV1.getNode());

+ NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);

+ DCI.AddToWorklist(NewV2.getNode());

+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);

DCI.AddToWorklist(Res.getNode());

return DAG.getBitcast(RootVT, Res);

}

- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,

- AllowIntDomain, V1, V2, DL, DAG,

- Subtarget, Shuffle, ShuffleVT,

- PermuteImm) &&

+ NewV1 = V1; // Save operands in case early exit happens.

+ NewV2 = V2;

+ if (matchBinaryPermuteVectorShuffle(

+ MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,

+ NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&

(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {

if (Depth == 1 && Root.getOpcode() == Shuffle)

return SDValue(); // Nothing to do!

- V1 = DAG.getBitcast(ShuffleVT, V1);

- DCI.AddToWorklist(V1.getNode());

- V2 = DAG.getBitcast(ShuffleVT, V2);

- DCI.AddToWorklist(V2.getNode());

- Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,

+ NewV1 = DAG.getBitcast(ShuffleVT, NewV1);

+ DCI.AddToWorklist(NewV1.getNode());

+ NewV2 = DAG.getBitcast(ShuffleVT, NewV2);

+ DCI.AddToWorklist(NewV2.getNode());

+ Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,

DAG.getConstant(PermuteImm, DL, MVT::i8));

DCI.AddToWorklist(Res.getNode());

return DAG.getBitcast(RootVT, Res);

diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 223eed3048db..967d67a84bc0 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp

@@ -754,7 +754,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

// type remains the same.

if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {

MVT LegalVT = LT.second;

- if (LegalVT.getVectorElementType().getSizeInBits() ==

+ if (LegalVT.isVector() &&

+ LegalVT.getVectorElementType().getSizeInBits() ==

Tp->getVectorElementType()->getPrimitiveSizeInBits() &&

LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {

diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index c0cd1ea74a74..026fab5dbd3b 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp

@@ -648,7 +648,7 @@ private:

// track in a CHI. In the PDom walk, there can be values in the

// stack which are not control dependent e.g., nested loop.

if (si != RenameStack.end() && si->second.size() &&

- DT->dominates(Pred, si->second.back()->getParent())) {

+ DT->properlyDominates(Pred, si->second.back()->getParent())) {

C.Dest = BB; // Assign the edge

C.I = si->second.pop_back_val(); // Assign the argument

DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName()

diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index b8fb80b6cc26..525425bd0f0c 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp

@@ -14,7 +14,6 @@

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/DivergenceAnalysis.h"

-#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/RegionInfo.h"

#include "llvm/Analysis/RegionIterator.h"

#include "llvm/Analysis/RegionPass.h"

@@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass {

Region *ParentRegion;

DominatorTree *DT;

- LoopInfo *LI;

- SmallVector<RegionNode *, 8> Order;

+ std::deque<RegionNode *> Order;

BBSet Visited;

BBPhiMap DeletedPhis;

@@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass {

void gatherPredicates(RegionNode *N);

- void collectInfos();

+ void analyzeNode(RegionNode *N);

void insertConditions(bool Loops);

@@ -258,7 +256,6 @@ public:

AU.addRequired<DivergenceAnalysis>();

AU.addRequiredID(LowerSwitchID);

AU.addRequired<DominatorTreeWrapperPass>();

- AU.addRequired<LoopInfoWrapperPass>();

AU.addPreserved<DominatorTreeWrapperPass>();

RegionPass::getAnalysisUsage(AU);

@@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {

/// \brief Build up the general order of nodes

void StructurizeCFG::orderNodes() {

- ReversePostOrderTraversal<Region*> RPOT(ParentRegion);

- SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;

+ assert(Visited.empty());

+ assert(Predicates.empty());

+ assert(Loops.empty());

+ assert(LoopPreds.empty());

- // The reverse post-order traversal of the list gives us an ordering close

- // to what we want. The only problem with it is that sometimes backedges

- // for outer loops will be visited before backedges for inner loops.

- for (RegionNode *RN : RPOT) {

- BasicBlock *BB = RN->getEntry();

- Loop *Loop = LI->getLoopFor(BB);

- ++LoopBlocks[Loop];

+ // This must be RPO order for the back edge detection to work

+ for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) {

+ // FIXME: Is there a better order to use for structurization?

+ Order.push_back(RN);

+ analyzeNode(RN);

}

- unsigned CurrentLoopDepth = 0;

- Loop *CurrentLoop = nullptr;

- for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {

- BasicBlock *BB = (*I)->getEntry();

- unsigned LoopDepth = LI->getLoopDepth(BB);

- if (is_contained(Order, *I))

- continue;

- if (LoopDepth < CurrentLoopDepth) {

- // Make sure we have visited all blocks in this loop before moving back to

- // the outer loop.

- auto LoopI = I;

- while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {

- LoopI++;

- BasicBlock *LoopBB = (*LoopI)->getEntry();

- if (LI->getLoopFor(LoopBB) == CurrentLoop) {

- --BlockCount;

- Order.push_back(*LoopI);

- }

- CurrentLoop = LI->getLoopFor(BB);

- if (CurrentLoop)

- LoopBlocks[CurrentLoop]--;

- CurrentLoopDepth = LoopDepth;

- Order.push_back(*I);

- }

- // This pass originally used a post-order traversal and then operated on

- // the list in reverse. Now that we are using a reverse post-order traversal

- // rather than re-working the whole pass to operate on the list in order,

- // we just reverse the list and continue to operate on it in reverse.

- std::reverse(Order.begin(), Order.end());

}

/// \brief Determine the end of the loops

@@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {

}

/// \brief Collect various loop and predicate infos

-void StructurizeCFG::collectInfos() {

- // Reset predicate

- Predicates.clear();

- // and loop infos

- Loops.clear();

- LoopPreds.clear();

+void StructurizeCFG::analyzeNode(RegionNode *RN) {

+ DEBUG(dbgs() << "Visiting: "

+ << (RN->isSubRegion() ? "SubRegion with entry: " : "")

+ << RN->getEntry()->getName() << '\n');

- // Reset the visited nodes

- Visited.clear();

- for (RegionNode *RN : reverse(Order)) {

- DEBUG(dbgs() << "Visiting: "

- << (RN->isSubRegion() ? "SubRegion with entry: " : "")

- << RN->getEntry()->getName() << " Loop Depth: "

- << LI->getLoopDepth(RN->getEntry()) << "\n");

- // Analyze all the conditions leading to a node

- gatherPredicates(RN);

+ // Analyze all the conditions leading to a node

+ gatherPredicates(RN);

- // Remember that we've seen this node

- Visited.insert(RN->getEntry());

+ // Remember that we've seen this node

+ Visited.insert(RN->getEntry());

- // Find the last back edges

- analyzeLoops(RN);

- }

+ // Find the last back edges

+ analyzeLoops(RN);

}

/// \brief Insert the missing branch conditions

@@ -664,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,

BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {

LLVMContext &Context = Func->getContext();

BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :

- Order.back()->getEntry();

+ Order.front()->getEntry();

BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,

Func, Insert);

DT->addNewBlock(Flow, Dominator);

@@ -744,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {

/// Take one node from the order vector and wire it up

void StructurizeCFG::wireFlow(bool ExitUseAllowed,

BasicBlock *LoopEnd) {

- RegionNode *Node = Order.pop_back_val();

+ RegionNode *Node = Order.front();

+ Order.pop_front();

Visited.insert(Node->getEntry());

if (isPredictableTrue(Node)) {

@@ -768,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,

PrevNode = Node;

while (!Order.empty() && !Visited.count(LoopEnd) &&

- dominatesPredicates(Entry, Order.back())) {

+ dominatesPredicates(Entry, Order.front())) {

handleLoops(false, LoopEnd);

}

@@ -779,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,

void StructurizeCFG::handleLoops(bool ExitUseAllowed,

BasicBlock *LoopEnd) {

- RegionNode *Node = Order.back();

+ RegionNode *Node = Order.front();

BasicBlock *LoopStart = Node->getEntry();

if (!Loops.count(LoopStart)) {

@@ -924,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {

ParentRegion = R;

DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

orderNodes();

- collectInfos();

createFlow();

insertConditions(false);

insertConditions(true);

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6ef54385c452..64f206ea92eb 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp

@@ -2630,9 +2630,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(

Instruction *LastInduction = VecInd;

for (unsigned Part = 0; Part < UF; ++Part) {

VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);

- recordVectorLoopValueForInductionCast(II, LastInduction, Part);

if (isa<TruncInst>(EntryVal))

addMetadata(LastInduction, EntryVal);

+ else

+ recordVectorLoopValueForInductionCast(II, LastInduction, Part);

LastInduction = cast<Instruction>(addFastMathFlag(

Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));

}

@@ -2754,15 +2757,17 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {

// If we haven't yet vectorized the induction variable, splat the scalar

// induction variable, and build the necessary step vectors.

+ // TODO: Don't do it unless the vectorized IV is really required.

if (!VectorizedIV) {

Value *Broadcasted = getBroadcastInstrs(ScalarIV);

for (unsigned Part = 0; Part < UF; ++Part) {

Value *EntryPart =

getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());

VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);

- recordVectorLoopValueForInductionCast(ID, EntryPart, Part);

if (Trunc)

addMetadata(EntryPart, Trunc);

+ else

+ recordVectorLoopValueForInductionCast(ID, EntryPart, Part);

}

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a7ccd3faec44..f301fc361abc 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -1347,7 +1347,6 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,

DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " <<

Lane << " from " << *Scalar << ".\n");

ExternalUses.emplace_back(Scalar, nullptr, Lane);

- continue;

}

for (User *U : Scalar->users()) {

DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");

@@ -4417,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {

if (!A || !B)

return false;

Value *VL[] = { A, B };

- return tryToVectorizeList(VL, R, None, true);

+ return tryToVectorizeList(VL, R, true);

}

bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

- ArrayRef<Value *> BuildVector,

- bool AllowReorder,

- bool NeedExtraction) {

+ bool AllowReorder) {

if (VL.size() < 2)

return false;

@@ -4517,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

<< "\n");

ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);

- ArrayRef<Value *> EmptyArray;

- ArrayRef<Value *> BuildVectorSlice;

- if (!BuildVector.empty())

- BuildVectorSlice = BuildVector.slice(I, OpsWidth);

- R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);

+ R.buildTree(Ops);

// TODO: check if we can allow reordering for more cases.

if (AllowReorder && R.shouldReorder()) {

// Conceptually, there is nothing actually preventing us from trying to

@@ -4530,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

// reductions. However, at this point, we only expect to get here when

// there are exactly two operations.

assert(Ops.size() == 2);

- assert(BuildVectorSlice.empty());

Value *ReorderedOps[] = {Ops[1], Ops[0]};

R.buildTree(ReorderedOps, None);

}

@@ -4550,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,

<< " and with tree size "

<< ore::NV("TreeSize", R.getTreeSize()));

- Value *VectorizedRoot = R.vectorizeTree();

- // Reconstruct the build vector by extracting the vectorized root. This

- // way we handle the case where some elements of the vector are

- // undefined.

- // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))

- if (!BuildVectorSlice.empty()) {

- // The insert point is the last build vector instruction. The

- // vectorized root will precede it. This guarantees that we get an

- // instruction. The vectorized tree could have been constant folded.

- Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());

- unsigned VecIdx = 0;

- for (auto &V : BuildVectorSlice) {

- IRBuilder<NoFolder> Builder(InsertAfter->getParent(),

- ++BasicBlock::iterator(InsertAfter));

- Instruction *I = cast<Instruction>(V);

- assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));

- Instruction *Extract =

- cast<Instruction>(Builder.CreateExtractElement(

- VectorizedRoot, Builder.getInt32(VecIdx++)));

- I->setOperand(1, Extract);

- I->moveAfter(Extract);

- InsertAfter = I;

- }

+ R.vectorizeTree();

// Move to the next bundle.

I += VF - 1;

NextInst = I + 1;

@@ -5495,11 +5462,9 @@ private:

///

/// Returns true if it matches

static bool findBuildVector(InsertElementInst *LastInsertElem,

- SmallVectorImpl<Value *> &BuildVector,

SmallVectorImpl<Value *> &BuildVectorOpds) {

Value *V = nullptr;

do {

- BuildVector.push_back(LastInsertElem);

BuildVectorOpds.push_back(LastInsertElem->getOperand(1));

V = LastInsertElem->getOperand(0);

if (isa<UndefValue>(V))

@@ -5508,7 +5473,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,

if (!LastInsertElem || !LastInsertElem->hasOneUse())

return false;

} while (true);

- std::reverse(BuildVector.begin(), BuildVector.end());

std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());

return true;

}

@@ -5517,11 +5481,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,

///

/// \return true if it matches.

static bool findBuildAggregate(InsertValueInst *IV,

- SmallVectorImpl<Value *> &BuildVector,

SmallVectorImpl<Value *> &BuildVectorOpds) {

Value *V;

do {

- BuildVector.push_back(IV);

BuildVectorOpds.push_back(IV->getInsertedValueOperand());

V = IV->getAggregateOperand();

if (isa<UndefValue>(V))

@@ -5530,7 +5492,6 @@ static bool findBuildAggregate(InsertValueInst *IV,

if (!IV || !IV->hasOneUse())

return false;

} while (true);

- std::reverse(BuildVector.begin(), BuildVector.end());

std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());

return true;

}

@@ -5706,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,

if (!R.canMapToVector(IVI->getType(), DL))

return false;

- SmallVector<Value *, 16> BuildVector;

SmallVector<Value *, 16> BuildVectorOpds;

- if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))

+ if (!findBuildAggregate(IVI, BuildVectorOpds))

return false;

DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");

// Aggregate value is unlikely to be processed in vector register, we need to

// extract scalars into scalar registers, so NeedExtraction is set true.

- return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);

+ return tryToVectorizeList(BuildVectorOpds, R);

}

bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,

BasicBlock *BB, BoUpSLP &R) {

- SmallVector<Value *, 16> BuildVector;

SmallVector<Value *, 16> BuildVectorOpds;

- if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))

+ if (!findBuildVector(IEI, BuildVectorOpds))

return false;

// Vectorize starting with the build vector operands ignoring the BuildVector

// instructions for the purpose of scheduling and user extraction.

- return tryToVectorizeList(BuildVectorOpds, R, BuildVector);

+ return tryToVectorizeList(BuildVectorOpds, R);

}

bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,

@@ -5804,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {

// is done when there are exactly two elements since tryToVectorizeList

// asserts that there are only two values when AllowReorder is true.

bool AllowReorder = NumElts == 2;

- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,

- None, AllowReorder)) {

+ if (NumElts > 1 &&

+ tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {

// Success start over because instructions might have been changed.

HaveVectorizedPhiNodes = true;

Changed = true;