summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-29 21:25:18 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-29 21:25:18 +0000
commit3ad6a4b447326bc16c17df65637ca02330b8d090 (patch)
tree568321855815f8ca008258972e27d4a3ea487475 /lib
parent93c91e39b29142dec1d03a30df9f6e757f56c193 (diff)
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp17
-rw-r--r--lib/CodeGen/InlineSpiller.cpp7
-rw-r--r--lib/CodeGen/RegAllocBase.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp21
-rw-r--r--lib/Option/OptTable.cpp4
-rw-r--r--lib/Support/CommandLine.cpp18
-rw-r--r--lib/Support/ErrorHandling.cpp3
-rw-r--r--lib/Support/TargetRegistry.cpp3
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp7
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp5
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td3
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ14.td22
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp5
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp28
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp27
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp16
18 files changed, 133 insertions, 86 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 45dc13d58de7..dc02a00e0fcc 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -4016,14 +4016,18 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
+// Max number of memory uses to look at before aborting the search to conserve
+// compile time.
+static constexpr int MaxMemoryUsesToScan = 20;
+
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
- SmallPtrSetImpl<Instruction *> &ConsideredInsts,
- const TargetLowering &TLI, const TargetRegisterInfo &TRI) {
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI, int SeenInsts = 0) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -4036,8 +4040,12 @@ static bool FindAllMemoryUses(
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
- Instruction *UserI = cast<Instruction>(U.getUser());
+ // Conservatively return true if we're seeing a large number or a deep chain
+ // of users. This avoids excessive compilation times in pathological cases.
+ if (SeenInsts++ >= MaxMemoryUsesToScan)
+ return true;
+ Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
continue;
@@ -4082,7 +4090,8 @@ static bool FindAllMemoryUses(
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
+ SeenInsts))
return true;
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 4e6a3ec21866..eda4f74c7874 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -643,8 +643,11 @@ void InlineSpiller::reMaterializeAll() {
Edit->eraseVirtReg(Reg);
continue;
}
- assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) &&
- "Reg with empty interval has reference");
+
+ assert(LIS.hasInterval(Reg) &&
+ (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) &&
+ "Empty and not used live-range?!");
+
RegsToSpill[ResultPos++] = Reg;
}
RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index a7b7a9f8ab15..7b4fbace2c1c 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -133,18 +133,19 @@ void RegAllocBase::allocatePhysRegs() {
if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
- for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
- I != E; ++I) {
- LiveInterval *SplitVirtReg = &LIS->getInterval(*I);
+ for (unsigned Reg : SplitVRegs) {
+ assert(LIS->hasInterval(Reg));
+
+ LiveInterval *SplitVirtReg = &LIS->getInterval(Reg);
assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ assert(SplitVirtReg->empty() && "Non-empty but used interval");
DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
aboutToRemoveInterval(*SplitVirtReg);
LIS->removeInterval(SplitVirtReg->reg);
continue;
}
DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(!SplitVirtReg->empty() && "expecting non-empty interval");
assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d41054b15bbc..0cad20db0964 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2965,7 +2965,12 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) {
else if (N.getOpcode() == ISD::SIGN_EXTEND)
N = N.getOperand(0);
- return (N.getOpcode() == ISD::SETCC);
+ if (isLogicalMaskOp(N.getOpcode()))
+ return isSETCCorConvertedSETCC(N.getOperand(0)) &&
+ isSETCCorConvertedSETCC(N.getOperand(1));
+
+ return (N.getOpcode() == ISD::SETCC ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
}
#endif
@@ -2973,28 +2978,20 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) {
// to ToMaskVT if needed with vector extension or truncation.
SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
EVT ToMaskVT) {
- LLVMContext &Ctx = *DAG.getContext();
-
// Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
- unsigned InMaskOpc = InMask->getOpcode();
-
// FIXME: This code seems to be too restrictive, we might consider
// generalizing it or dropping it.
- assert((InMaskOpc == ISD::SETCC ||
- ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) ||
- (isLogicalMaskOp(InMaskOpc) &&
- isSETCCorConvertedSETCC(InMask->getOperand(0)) &&
- isSETCCorConvertedSETCC(InMask->getOperand(1)))) &&
- "Unexpected mask argument.");
+ assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
// Make a new Mask node, with a legal result VT.
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
Ops.push_back(InMask->getOperand(i));
- SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops);
+ SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
// If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
// extend or truncate is needed.
+ LLVMContext &Ctx = *DAG.getContext();
unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
if (MaskScalarBits < ToMaskScalBits) {
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index f3b438e829d6..51c62d33f8e1 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -235,7 +235,9 @@ OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const {
continue;
for (int I = 0; In.Prefixes[I]; I++) {
- std::string S = std::string(In.Prefixes[I]) + std::string(In.Name);
+ std::string S = std::string(In.Prefixes[I]) + std::string(In.Name) + "\t";
+ if (In.HelpText)
+ S += In.HelpText;
if (StringRef(S).startswith(Cur))
Ret.push_back(S);
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 50173f5256bf..8eeb685a18a9 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -2039,9 +2039,9 @@ void CommandLineParser::printOptionValues() {
Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
}
-static VersionPrinterTy OverrideVersionPrinter = nullptr;
+static void (*OverrideVersionPrinter)() = nullptr;
-static std::vector<VersionPrinterTy> *ExtraVersionPrinters = nullptr;
+static std::vector<void (*)()> *ExtraVersionPrinters = nullptr;
namespace {
class VersionPrinter {
@@ -2081,7 +2081,7 @@ public:
return;
if (OverrideVersionPrinter != nullptr) {
- OverrideVersionPrinter(outs());
+ (*OverrideVersionPrinter)();
exit(0);
}
print();
@@ -2090,8 +2090,10 @@ public:
// information.
if (ExtraVersionPrinters != nullptr) {
outs() << '\n';
- for (auto I : *ExtraVersionPrinters)
- I(outs());
+ for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(),
+ E = ExtraVersionPrinters->end();
+ I != E; ++I)
+ (*I)();
}
exit(0);
@@ -2129,11 +2131,11 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) {
/// Utility function for printing version number.
void cl::PrintVersionMessage() { VersionPrinterInstance.print(); }
-void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; }
+void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; }
-void cl::AddExtraVersionPrinter(VersionPrinterTy func) {
+void cl::AddExtraVersionPrinter(void (*func)()) {
if (!ExtraVersionPrinters)
- ExtraVersionPrinters = new std::vector<VersionPrinterTy>;
+ ExtraVersionPrinters = new std::vector<void (*)()>;
ExtraVersionPrinters->push_back(func);
}
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 2fd4f3ea0d45..fb8ae4c1cd5e 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -169,7 +169,8 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) {
// Don't call the normal error handler. It may allocate memory. Directly write
// an OOM to stderr and abort.
char OOMMessage[] = "LLVM ERROR: out of memory\n";
- (void)::write(2, OOMMessage, strlen(OOMMessage));
+ ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage));
+ (void)written;
abort();
#endif
}
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index b5c283253117..bed9ed64f802 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -114,7 +114,7 @@ static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS,
return LHS->first.compare(RHS->first);
}
-void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) {
+void TargetRegistry::printRegisteredTargetsForVersion() {
std::vector<std::pair<StringRef, const Target*> > Targets;
size_t Width = 0;
for (const auto &T : TargetRegistry::targets()) {
@@ -123,6 +123,7 @@ void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) {
}
array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn);
+ raw_ostream &OS = outs();
OS << " Registered Targets:\n";
for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
OS << " " << Targets[i].first;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index c6150f9e5d1d..8c30c4410c09 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2889,9 +2889,12 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
- if (IsWin64)
+ if (IsWin64) {
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
- else
+ if (GPRSaveSize & 15)
+ // The extra size here, if triggered, will always be 8.
+ MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
+ } else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 7c31c8e397ba..a844081db5b2 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -297,6 +297,11 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
case AMDGPU::FLAT_SCR_HI:
O << "flat_scratch_hi";
return;
+ case AMDGPU::FP_REG:
+ case AMDGPU::SP_REG:
+ case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
+ case AMDGPU::PRIVATE_RSRC_REG:
+ llvm_unreachable("pseudo-register should not ever be emitted");
default:
break;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index 54ea7805e18d..d097b78890e3 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -274,8 +274,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
- SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT,
- FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
let AllocationPriority = 7;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 0d021d67033e..0a72a4438218 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -61,14 +61,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Sparc::fixup_sparc_lo10:
return Value & 0x3ff;
- case Sparc::fixup_sparc_tls_ldo_hix22:
- case Sparc::fixup_sparc_tls_le_hix22:
- return (~Value >> 10) & 0x3fffff;
-
- case Sparc::fixup_sparc_tls_ldo_lox10:
- case Sparc::fixup_sparc_tls_le_lox10:
- return (~(~Value & 0x3ff)) & 0x1fff;
-
case Sparc::fixup_sparc_h44:
return (Value >> 22) & 0x3fffff;
@@ -84,6 +76,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Sparc::fixup_sparc_hm:
return (Value >> 32) & 0x3ff;
+ case Sparc::fixup_sparc_tls_ldo_hix22:
+ case Sparc::fixup_sparc_tls_le_hix22:
+ case Sparc::fixup_sparc_tls_ldo_lox10:
+ case Sparc::fixup_sparc_tls_le_lox10:
+ assert(Value == 0 && "Sparc TLS relocs expect zero Value");
+ return 0;
+
case Sparc::fixup_sparc_tls_gd_add:
case Sparc::fixup_sparc_tls_gd_call:
case Sparc::fixup_sparc_tls_ldm_add:
diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td
index f11177af91a5..698eb5627d19 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -455,10 +455,10 @@ def : InstRW<[FXa, LSU, Lat8], (instregex "MH(Y)?$")>;
def : InstRW<[FXa2, Lat6, GroupAlone], (instregex "M(L)?R$")>;
def : InstRW<[FXa2, LSU, Lat10, GroupAlone], (instregex "M(FY|L)?$")>;
def : InstRW<[FXa, LSU, Lat8], (instregex "MGH$")>;
-def : InstRW<[FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>;
-def : InstRW<[FXa, Lat8, GroupAlone], (instregex "MGRK$")>;
-def : InstRW<[FXa, LSU, Lat9, GroupAlone], (instregex "MSC$")>;
-def : InstRW<[FXa, LSU, Lat11, GroupAlone], (instregex "MSGC$")>;
+def : InstRW<[FXa, FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>;
+def : InstRW<[FXa, FXa, Lat8, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[FXa, LSU, Lat9], (instregex "MSC$")>;
+def : InstRW<[FXa, LSU, Lat11], (instregex "MSGC$")>;
def : InstRW<[FXa, Lat5], (instregex "MSRKC$")>;
def : InstRW<[FXa, Lat7], (instregex "MSGRKC$")>;
@@ -620,7 +620,7 @@ def : InstRW<[FXa, Lat30], (instregex "(PCC|PPNO|PRNO)$")>;
def : InstRW<[LSU], (instregex "LGG$")>;
def : InstRW<[LSU, Lat5], (instregex "LLGFSG$")>;
-def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)GSC$")>;
+def : InstRW<[LSU, Lat30], (instregex "(L|ST)GSC$")>;
//===----------------------------------------------------------------------===//
// Decimal arithmetic
@@ -708,7 +708,7 @@ def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>;
// Processor assist
//===----------------------------------------------------------------------===//
-def : InstRW<[FXb], (instregex "PPA$")>;
+def : InstRW<[FXb, GroupAlone], (instregex "PPA$")>;
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
@@ -1276,9 +1276,9 @@ def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>;
def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>;
def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>;
-def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>;
+def : InstRW<[VecXsPm], (instregex "VSLB$")>;
def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>;
-def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>;
+def : InstRW<[VecXsPm], (instregex "VSR(A|L)B$")>;
def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>;
@@ -1435,9 +1435,9 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
// Vector: Packed-decimal instructions
//===----------------------------------------------------------------------===//
-def : InstRW<[VecDF, VecDF, Lat10, GroupAlone], (instregex "VLIP$")>;
-def : InstRW<[VecDFX, LSU, Lat12, GroupAlone], (instregex "VPKZ$")>;
-def : InstRW<[VecDFX, FXb, LSU, Lat12, GroupAlone], (instregex "VUPKZ$")>;
+def : InstRW<[VecDF, VecDF, Lat10], (instregex "VLIP$")>;
+def : InstRW<[VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>;
+def : InstRW<[VecDFX, FXb, LSU, Lat12, BeginGroup], (instregex "VUPKZ$")>;
def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVB(G)?$")>;
def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVD(G)?$")>;
def : InstRW<[VecDFX], (instregex "V(A|S)P$")>;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 3c4589ab18f6..8f24f98be681 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1055,7 +1055,10 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// Scale the leading zero count down based on the actual size of the value.
// Also scale it down based on the size of the shift.
- MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
+ unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
+ if (MaskLZ < ScaleDown)
+ return true;
+ MaskLZ -= ScaleDown;
// The final check is to ensure that any masked out high bits of X are
// already known to be zero. Otherwise, the mask has a semantic impact
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 44eecd664714..ba8eb8656585 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1672,8 +1672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: These control memcmp expansion in CGP and could be raised higher, but
// that needs to benchmarked and balanced with the potential use of vector
- // load/store types (PR33329).
- MaxLoadsPerMemcmp = 4;
+ // load/store types (PR33329, PR33914).
+ MaxLoadsPerMemcmp = 2;
MaxLoadsPerMemcmpOptSize = 2;
// Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
@@ -22022,8 +22022,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
// i64 SRA needs to be performed as partial shifts.
- if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
- Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP())
+ if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||
+ (Subtarget.hasInt256() && VT == MVT::v4i64)) &&
+ Op.getOpcode() == ISD::SRA)
return ArithmeticShiftRight64(ShiftAmt);
if (VT == MVT::v16i8 ||
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 4056cc5cb346..dc9143bebc45 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -64,6 +64,11 @@ ImplicationSearchThreshold(
"condition to use to thread over a weaker condition"),
cl::init(3), cl::Hidden);
+static cl::opt<bool> PrintLVIAfterJumpThreading(
+ "print-lvi-after-jump-threading",
+ cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
+ cl::Hidden);
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
@@ -93,9 +98,10 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ if (PrintLVIAfterJumpThreading)
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LazyValueInfoWrapperPass>();
- AU.addPreserved<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
@@ -137,8 +143,14 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
- std::move(BPI));
+ bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
+ std::move(BPI));
+ if (PrintLVIAfterJumpThreading) {
+ dbgs() << "LVI for function '" << F.getName() << "':\n";
+ LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ dbgs());
+ }
+ return Changed;
}
PreservedAnalyses JumpThreadingPass::run(Function &F,
@@ -231,13 +243,15 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Can't thread an unconditional jump, but if the block is "almost
// empty", we can replace uses of it with uses of the successor and make
// this dead.
- // We should not eliminate the loop header either, because eliminating
- // a loop header might later prevent LoopSimplify from transforming nested
- // loops into simplified form.
+ // We should not eliminate the loop header or latch either, because
+ // eliminating a loop header or latch might later prevent LoopSimplify
+ // from transforming nested loops into simplified form. We will rely on
+ // later passes in backend to clean up empty blocks.
if (BI && BI->isUnconditional() &&
BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
+ BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) &&
+ !LoopHeaders.count(BI->getSuccessor(0))) {
// FIXME: It is always conservatively correct to drop the info
// for a block even if it doesn't get erased. This isn't totally
// awesome, but it allows us to use AssertingVH to prevent nasty
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 58b70be95d99..3c522786641a 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -1376,16 +1376,21 @@ Value *llvm::createTargetReduction(IRBuilder<> &Builder,
}
}
-void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
- if (auto *VecOp = dyn_cast<Instruction>(I)) {
- if (auto *I0 = dyn_cast<Instruction>(VL[0])) {
- // VecOVp is initialized to the 0th scalar, so start counting from index
- // '1'.
- VecOp->copyIRFlags(I0);
- for (int i = 1, e = VL.size(); i < e; ++i) {
- if (auto *Scalar = dyn_cast<Instruction>(VL[i]))
- VecOp->andIRFlags(Scalar);
- }
- }
+void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
+ auto *VecOp = dyn_cast<Instruction>(I);
+ if (!VecOp)
+ return;
+ auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
+ : dyn_cast<Instruction>(OpValue);
+ if (!Intersection)
+ return;
+ const unsigned Opcode = Intersection->getOpcode();
+ VecOp->copyIRFlags(Intersection);
+ for (auto *V : VL) {
+ auto *Instr = dyn_cast<Instruction>(V);
+ if (!Instr)
+ continue;
+ if (OpValue == nullptr || Opcode == Instr->getOpcode())
+ VecOp->andIRFlags(V);
}
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index dee658f98393..8784b9702141 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5656,20 +5656,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
if (SinkCommon && SinkThenElseCodeToEnd(BI))
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
- // if LoopHeader is provided, check if the block is a loop header
- // (This is for early invocations before loop simplify and vectorization
- // to keep canonical loop forms for nested loops.
- // These blocks can be eliminated when the pass is invoked later
- // in the back-end.)
+ // if LoopHeader is provided, check if the block or its successor is a loop
+ // header (This is for early invocations before loop simplify and
+ // vectorization to keep canonical loop forms for nested loops. These blocks
+ // can be eliminated when the pass is invoked later in the back-end.)
+ bool NeedCanonicalLoop =
+ !LateSimplifyCFG &&
+ (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
- (!LoopHeaders || !LoopHeaders->count(BB)) &&
- TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
// If the only instruction in the block is a seteq/setne comparison