diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-07-29 21:25:18 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-07-29 21:25:18 +0000 |
| commit | 3ad6a4b447326bc16c17df65637ca02330b8d090 (patch) | |
| tree | 568321855815f8ca008258972e27d4a3ea487475 /lib | |
| parent | 93c91e39b29142dec1d03a30df9f6e757f56c193 (diff) | |
Notes
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/CodeGen/CodeGenPrepare.cpp | 17 | ||||
| -rw-r--r-- | lib/CodeGen/InlineSpiller.cpp | 7 | ||||
| -rw-r--r-- | lib/CodeGen/RegAllocBase.cpp | 9 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 21 | ||||
| -rw-r--r-- | lib/Option/OptTable.cpp | 4 | ||||
| -rw-r--r-- | lib/Support/CommandLine.cpp | 18 | ||||
| -rw-r--r-- | lib/Support/ErrorHandling.cpp | 3 | ||||
| -rw-r--r-- | lib/Support/TargetRegistry.cpp | 3 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 7 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 5 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/SIRegisterInfo.td | 3 | ||||
| -rw-r--r-- | lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp | 15 | ||||
| -rw-r--r-- | lib/Target/SystemZ/SystemZScheduleZ14.td | 22 | ||||
| -rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 5 | ||||
| -rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 9 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 28 | ||||
| -rw-r--r-- | lib/Transforms/Utils/LoopUtils.cpp | 27 | ||||
| -rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 16 |
18 files changed, 133 insertions, 86 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 45dc13d58de7..dc02a00e0fcc 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -4016,14 +4016,18 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, return true; } +// Max number of memory uses to look at before aborting the search to conserve +// compile time. +static constexpr int MaxMemoryUsesToScan = 20; + /// Recursively walk all the uses of I until we find a memory use. /// If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, - SmallPtrSetImpl<Instruction *> &ConsideredInsts, - const TargetLowering &TLI, const TargetRegisterInfo &TRI) { + SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, + const TargetRegisterInfo &TRI, int SeenInsts = 0) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -4036,8 +4040,12 @@ static bool FindAllMemoryUses( // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { - Instruction *UserI = cast<Instruction>(U.getUser()); + // Conservatively return true if we're seeing a large number or a deep chain + // of users. This avoids excessive compilation times in pathological cases. + if (SeenInsts++ >= MaxMemoryUsesToScan) + return true; + Instruction *UserI = cast<Instruction>(U.getUser()); if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); continue; @@ -4082,7 +4090,8 @@ static bool FindAllMemoryUses( continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI)) + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, + SeenInsts)) return true; } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 4e6a3ec21866..eda4f74c7874 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -643,8 +643,11 @@ void InlineSpiller::reMaterializeAll() { Edit->eraseVirtReg(Reg); continue; } - assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) && - "Reg with empty interval has reference"); + + assert(LIS.hasInterval(Reg) && + (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) && + "Empty and not used live-range?!"); + RegsToSpill[ResultPos++] = Reg; } RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index a7b7a9f8ab15..7b4fbace2c1c 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -133,18 +133,19 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); - for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); - I != E; ++I) { - LiveInterval *SplitVirtReg = &LIS->getInterval(*I); + for (unsigned Reg : SplitVRegs) { + assert(LIS->hasInterval(Reg)); + + LiveInterval *SplitVirtReg = &LIS->getInterval(Reg); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + assert(SplitVirtReg->empty() && "Non-empty but used interval"); DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); aboutToRemoveInterval(*SplitVirtReg); LIS->removeInterval(SplitVirtReg->reg); continue; } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(!SplitVirtReg->empty() && "expecting non-empty interval"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); enqueue(SplitVirtReg); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d41054b15bbc..0cad20db0964 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2965,7 +2965,12 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { else if (N.getOpcode() == ISD::SIGN_EXTEND) N = N.getOperand(0); - return (N.getOpcode() == ISD::SETCC); + if (isLogicalMaskOp(N.getOpcode())) + return isSETCCorConvertedSETCC(N.getOperand(0)) && + isSETCCorConvertedSETCC(N.getOperand(1)); + + return (N.getOpcode() == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); } #endif @@ -2973,28 +2978,20 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) { // to ToMaskVT if needed with vector extension or truncation. SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT) { - LLVMContext &Ctx = *DAG.getContext(); - // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. - unsigned InMaskOpc = InMask->getOpcode(); - // FIXME: This code seems to be too restrictive, we might consider // generalizing it or dropping it. - assert((InMaskOpc == ISD::SETCC || - ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) || - (isLogicalMaskOp(InMaskOpc) && - isSETCCorConvertedSETCC(InMask->getOperand(0)) && - isSETCCorConvertedSETCC(InMask->getOperand(1)))) && - "Unexpected mask argument."); + assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); // Make a new Mask node, with a legal result VT. SmallVector<SDValue, 4> Ops; for (unsigned i = 0; i < InMask->getNumOperands(); ++i) Ops.push_back(InMask->getOperand(i)); - SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops); + SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign // extend or truncate is needed. + LLVMContext &Ctx = *DAG.getContext(); unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); if (MaskScalarBits < ToMaskScalBits) { diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index f3b438e829d6..51c62d33f8e1 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -235,7 +235,9 @@ OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { continue; for (int I = 0; In.Prefixes[I]; I++) { - std::string S = std::string(In.Prefixes[I]) + std::string(In.Name); + std::string S = std::string(In.Prefixes[I]) + std::string(In.Name) + "\t"; + if (In.HelpText) + S += In.HelpText; if (StringRef(S).startswith(Cur)) Ret.push_back(S); } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 50173f5256bf..8eeb685a18a9 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -2039,9 +2039,9 @@ void CommandLineParser::printOptionValues() { Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); } -static VersionPrinterTy OverrideVersionPrinter = nullptr; +static void (*OverrideVersionPrinter)() = nullptr; -static std::vector<VersionPrinterTy> *ExtraVersionPrinters = nullptr; +static std::vector<void (*)()> *ExtraVersionPrinters = nullptr; namespace { class VersionPrinter { @@ -2081,7 +2081,7 @@ public: return; if (OverrideVersionPrinter != nullptr) { - OverrideVersionPrinter(outs()); + (*OverrideVersionPrinter)(); exit(0); } print(); @@ -2090,8 +2090,10 @@ public: // information. if (ExtraVersionPrinters != nullptr) { outs() << '\n'; - for (auto I : *ExtraVersionPrinters) - I(outs()); + for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(), + E = ExtraVersionPrinters->end(); + I != E; ++I) + (*I)(); } exit(0); @@ -2129,11 +2131,11 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) { /// Utility function for printing version number. void cl::PrintVersionMessage() { VersionPrinterInstance.print(); } -void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; } +void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; } -void cl::AddExtraVersionPrinter(VersionPrinterTy func) { +void cl::AddExtraVersionPrinter(void (*func)()) { if (!ExtraVersionPrinters) - ExtraVersionPrinters = new std::vector<VersionPrinterTy>; + ExtraVersionPrinters = new std::vector<void (*)()>; ExtraVersionPrinters->push_back(func); } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 2fd4f3ea0d45..fb8ae4c1cd5e 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -169,7 +169,8 @@ void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { // Don't call the normal error handler. It may allocate memory. Directly write // an OOM to stderr and abort. char OOMMessage[] = "LLVM ERROR: out of memory\n"; - (void)::write(2, OOMMessage, strlen(OOMMessage)); + ssize_t written = ::write(2, OOMMessage, strlen(OOMMessage)); + (void)written; abort(); #endif } diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index b5c283253117..bed9ed64f802 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -114,7 +114,7 @@ static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS, return LHS->first.compare(RHS->first); } -void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { +void TargetRegistry::printRegisteredTargetsForVersion() { std::vector<std::pair<StringRef, const Target*> > Targets; size_t Width = 0; for (const auto &T : TargetRegistry::targets()) { @@ -123,6 +123,7 @@ void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { } array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn); + raw_ostream &OS = outs(); OS << " Registered Targets:\n"; for (unsigned i = 0, e = Targets.size(); i != e; ++i) { OS << " " << Targets[i].first; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index c6150f9e5d1d..8c30c4410c09 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2889,9 +2889,12 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - if (IsWin64) + if (IsWin64) { GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); - else + if (GPRSaveSize & 15) + // The extra size here, if triggered, will always be 8. + MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); + } else GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 7c31c8e397ba..a844081db5b2 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -297,6 +297,11 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, case AMDGPU::FLAT_SCR_HI: O << "flat_scratch_hi"; return; + case AMDGPU::FP_REG: + case AMDGPU::SP_REG: + case AMDGPU::SCRATCH_WAVE_OFFSET_REG: + case AMDGPU::PRIVATE_RSRC_REG: + llvm_unreachable("pseudo-register should not ever be emitted"); default: break; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 54ea7805e18d..d097b78890e3 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -274,8 +274,7 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, - SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, - FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { + SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { let AllocationPriority = 7; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 0d021d67033e..0a72a4438218 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -61,14 +61,6 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_lo10: return Value & 0x3ff; - case Sparc::fixup_sparc_tls_ldo_hix22: - case Sparc::fixup_sparc_tls_le_hix22: - return (~Value >> 10) & 0x3fffff; - - case Sparc::fixup_sparc_tls_ldo_lox10: - case Sparc::fixup_sparc_tls_le_lox10: - return (~(~Value & 0x3ff)) & 0x1fff; - case Sparc::fixup_sparc_h44: return (Value >> 22) & 0x3fffff; @@ -84,6 +76,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Sparc::fixup_sparc_hm: return (Value >> 32) & 0x3ff; + case Sparc::fixup_sparc_tls_ldo_hix22: + case Sparc::fixup_sparc_tls_le_hix22: + case Sparc::fixup_sparc_tls_ldo_lox10: + case Sparc::fixup_sparc_tls_le_lox10: + assert(Value == 0 && "Sparc TLS relocs expect zero Value"); + return 0; + case Sparc::fixup_sparc_tls_gd_add: case Sparc::fixup_sparc_tls_gd_call: case Sparc::fixup_sparc_tls_ldm_add: diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td index f11177af91a5..698eb5627d19 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -455,10 +455,10 @@ def : InstRW<[FXa, LSU, Lat8], (instregex "MH(Y)?$")>; def : InstRW<[FXa2, Lat6, GroupAlone], (instregex "M(L)?R$")>; def : InstRW<[FXa2, LSU, Lat10, GroupAlone], (instregex "M(FY|L)?$")>; def : InstRW<[FXa, LSU, Lat8], (instregex "MGH$")>; -def : InstRW<[FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>; -def : InstRW<[FXa, Lat8, GroupAlone], (instregex "MGRK$")>; -def : InstRW<[FXa, LSU, Lat9, GroupAlone], (instregex "MSC$")>; -def : InstRW<[FXa, LSU, Lat11, GroupAlone], (instregex "MSGC$")>; +def : InstRW<[FXa, FXa, LSU, Lat12, GroupAlone], (instregex "MG$")>; +def : InstRW<[FXa, FXa, Lat8, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[FXa, LSU, Lat9], (instregex "MSC$")>; +def : InstRW<[FXa, LSU, Lat11], (instregex "MSGC$")>; def : InstRW<[FXa, Lat5], (instregex "MSRKC$")>; def : InstRW<[FXa, Lat7], (instregex "MSGRKC$")>; @@ -620,7 +620,7 @@ def : InstRW<[FXa, Lat30], (instregex "(PCC|PPNO|PRNO)$")>; def : InstRW<[LSU], (instregex "LGG$")>; def : InstRW<[LSU, Lat5], (instregex "LLGFSG$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)GSC$")>; +def : InstRW<[LSU, Lat30], (instregex "(L|ST)GSC$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic @@ -708,7 +708,7 @@ def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>; // Processor assist //===----------------------------------------------------------------------===// -def : InstRW<[FXb], (instregex "PPA$")>; +def : InstRW<[FXb, GroupAlone], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Miscellaneous Instructions. @@ -1276,9 +1276,9 @@ def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>; def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>; def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>; -def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>; +def : InstRW<[VecXsPm], (instregex "VSLB$")>; def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>; -def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>; +def : InstRW<[VecXsPm], (instregex "VSR(A|L)B$")>; def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>; @@ -1435,9 +1435,9 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; // Vector: Packed-decimal instructions //===----------------------------------------------------------------------===// -def : InstRW<[VecDF, VecDF, Lat10, GroupAlone], (instregex "VLIP$")>; -def : InstRW<[VecDFX, LSU, Lat12, GroupAlone], (instregex "VPKZ$")>; -def : InstRW<[VecDFX, FXb, LSU, Lat12, GroupAlone], (instregex "VUPKZ$")>; +def : InstRW<[VecDF, VecDF, Lat10], (instregex "VLIP$")>; +def : InstRW<[VecDFX, LSU, GroupAlone], (instregex "VPKZ$")>; +def : InstRW<[VecDFX, FXb, LSU, Lat12, BeginGroup], (instregex "VUPKZ$")>; def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVB(G)?$")>; def : InstRW<[VecDF, VecDF, FXb, Lat20, GroupAlone], (instregex "VCVD(G)?$")>; def : InstRW<[VecDFX], (instregex "V(A|S)P$")>; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3c4589ab18f6..8f24f98be681 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1055,7 +1055,10 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // Scale the leading zero count down based on the actual size of the value. // Also scale it down based on the size of the shift. - MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; + unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; + if (MaskLZ < ScaleDown) + return true; + MaskLZ -= ScaleDown; // The final check is to ensure that any masked out high bits of X are // already known to be zero. Otherwise, the mask has a semantic impact diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 44eecd664714..ba8eb8656585 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1672,8 +1672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // TODO: These control memcmp expansion in CGP and could be raised higher, but // that needs to benchmarked and balanced with the potential use of vector - // load/store types (PR33329). - MaxLoadsPerMemcmp = 4; + // load/store types (PR33329, PR33914). + MaxLoadsPerMemcmp = 2; MaxLoadsPerMemcmpOptSize = 2; // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4). @@ -22022,8 +22022,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); // i64 SRA needs to be performed as partial shifts. - if ((VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64)) && - Op.getOpcode() == ISD::SRA && !Subtarget.hasXOP()) + if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || + (Subtarget.hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA) return ArithmeticShiftRight64(ShiftAmt); if (VT == MVT::v16i8 || diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 4056cc5cb346..dc9143bebc45 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -64,6 +64,11 @@ ImplicationSearchThreshold( "condition to use to thread over a weaker condition"), cl::init(3), cl::Hidden); +static cl::opt<bool> PrintLVIAfterJumpThreading( + "print-lvi-after-jump-threading", + cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), + cl::Hidden); + namespace { /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the @@ -93,9 +98,10 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + if (PrintLVIAfterJumpThreading) + AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<LazyValueInfoWrapperPass>(); - AU.addPreserved<LazyValueInfoWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } @@ -137,8 +143,14 @@ bool JumpThreading::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), + std::move(BPI)); + if (PrintLVIAfterJumpThreading) { + dbgs() << "LVI for function '" << F.getName() << "':\n"; + LVI->printLVI(F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), + dbgs()); + } + return Changed; } PreservedAnalyses JumpThreadingPass::run(Function &F, @@ -231,13 +243,15 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. - // We should not eliminate the loop header either, because eliminating - // a loop header might later prevent LoopSimplify from transforming nested - // loops into simplified form. + // We should not eliminate the loop header or latch either, because + // eliminating a loop header or latch might later prevent LoopSimplify + // from transforming nested loops into simplified form. We will rely on + // later passes in backend to clean up empty blocks. if (BI && BI->isUnconditional() && BB != &BB->getParent()->getEntryBlock() && // If the terminator is the only non-phi instruction, try to nuke it. - BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) { + BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) && + !LoopHeaders.count(BI->getSuccessor(0))) { // FIXME: It is always conservatively correct to drop the info // for a block even if it doesn't get erased. This isn't totally // awesome, but it allows us to use AssertingVH to prevent nasty diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 58b70be95d99..3c522786641a 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -1376,16 +1376,21 @@ Value *llvm::createTargetReduction(IRBuilder<> &Builder, } } -void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL) { - if (auto *VecOp = dyn_cast<Instruction>(I)) { - if (auto *I0 = dyn_cast<Instruction>(VL[0])) { - // VecOVp is initialized to the 0th scalar, so start counting from index - // '1'. - VecOp->copyIRFlags(I0); - for (int i = 1, e = VL.size(); i < e; ++i) { - if (auto *Scalar = dyn_cast<Instruction>(VL[i])) - VecOp->andIRFlags(Scalar); - } - } +void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { + auto *VecOp = dyn_cast<Instruction>(I); + if (!VecOp) + return; + auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) + : dyn_cast<Instruction>(OpValue); + if (!Intersection) + return; + const unsigned Opcode = Intersection->getOpcode(); + VecOp->copyIRFlags(Intersection); + for (auto *V : VL) { + auto *Instr = dyn_cast<Instruction>(V); + if (!Instr) + continue; + if (OpValue == nullptr || Opcode == Instr->getOpcode()) + VecOp->andIRFlags(V); } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index dee658f98393..8784b9702141 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5656,20 +5656,22 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header - // (This is for early invocations before loop simplify and vectorization - // to keep canonical loop forms for nested loops. - // These blocks can be eliminated when the pass is invoked later - // in the back-end.) + // if LoopHeader is provided, check if the block or its successor is a loop + // header (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + bool NeedCanonicalLoop = + !LateSimplifyCFG && + (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - (!LoopHeaders || !LoopHeaders->count(BB)) && - TryToSimplifyUncondBranchFromEmptyBlock(BB)) + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; // If the only instruction in the block is a seteq/setne comparison |
