diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-02-01 21:07:55 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-02-01 21:07:55 +0000 |
commit | 4a6a1ccbecd7e34f40b05b4ba0a05d0031dd1eff (patch) | |
tree | bd998e25df07b7abd964ad088180d19152336f8d | |
parent | a096e0bdf6cfa020569afca490d8e4c9ac8ebb01 (diff) |
Notes
46 files changed, 756 insertions, 315 deletions
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 8ef9f6b86c515..c28a3829bfee1 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -15,7 +15,7 @@ Introduction ============ This document contains the release notes for the LLVM Compiler Infrastructure, -release 5.0.0. Here we describe the status of LLVM, including major improvements +release 6.0.0. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the `LLVM releases web site <http://llvm.org/releases/>`_. diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 1c51523b15730..1fdb3cff5372b 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -508,7 +508,8 @@ class Value; /// -> LHS = %a, RHS = i32 4, *CastOp = Instruction::SExt /// SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, - Instruction::CastOps *CastOp = nullptr); + Instruction::CastOps *CastOp = nullptr, + unsigned Depth = 0); inline SelectPatternResult matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, Instruction::CastOps *CastOp = nullptr) { diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index 7ebde03a758c5..85b55e85469a1 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -422,14 +422,21 @@ class MCFillFragment : public MCFragment { uint8_t Value; /// The number of bytes to insert. - uint64_t Size; + const MCExpr &Size; + + /// Source location of the directive that this fragment was created for. + SMLoc Loc; public: - MCFillFragment(uint8_t Value, uint64_t Size, MCSection *Sec = nullptr) - : MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size) {} + MCFillFragment(uint8_t Value, const MCExpr &Size, SMLoc Loc, + MCSection *Sec = nullptr) + : MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size), Loc(Loc) { + } uint8_t getValue() const { return Value; } - uint64_t getSize() const { return Size; } + const MCExpr &getSize() const { return Size; } + + SMLoc getLoc() const { return Loc; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Fill; diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index a3dbc56ebc10f..43ed00b4a7a7f 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -161,7 +161,6 @@ public: bool EmitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, SMLoc Loc) override; using MCStreamer::emitFill; - void emitFill(uint64_t NumBytes, uint8_t FillValue) override; void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc = SMLoc()) override; void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index a820517007082..28b326ae9b87a 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -662,7 +662,7 @@ public: /// \brief Emit NumBytes bytes worth of the value specified by FillValue. /// This implements directives such as '.space'. - virtual void emitFill(uint64_t NumBytes, uint8_t FillValue); + void emitFill(uint64_t NumBytes, uint8_t FillValue); /// \brief Emit \p Size bytes worth of the value specified by \p FillValue. /// diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a0032f99ec209..46ac3f451f81a 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -4165,17 +4165,18 @@ static SelectPatternResult matchClamp(CmpInst::Predicate Pred, /// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, - Value *TrueVal, Value *FalseVal) { + Value *TVal, Value *FVal, + unsigned Depth) { // TODO: Allow FP min/max with nnan/nsz. assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); Value *A, *B; - SelectPatternResult L = matchSelectPattern(TrueVal, A, B); + SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1); if (!SelectPatternResult::isMinOrMax(L.Flavor)) return {SPF_UNKNOWN, SPNB_NA, false}; Value *C, *D; - SelectPatternResult R = matchSelectPattern(FalseVal, C, D); + SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1); if (L.Flavor != R.Flavor) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4214,7 +4215,7 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, break; return {SPF_UNKNOWN, SPNB_NA, false}; default: - llvm_unreachable("Bad flavor while matching min/max"); + return {SPF_UNKNOWN, SPNB_NA, false}; } // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) @@ -4240,7 +4241,8 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, - Value *&LHS, Value *&RHS) { + Value *&LHS, Value *&RHS, + unsigned Depth) { // Assume success. If there's no match, callers should not use these anyway. LHS = TrueVal; RHS = FalseVal; @@ -4249,7 +4251,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; - SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); + SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth); if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; @@ -4313,7 +4315,8 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, FastMathFlags FMF, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, - Value *&LHS, Value *&RHS) { + Value *&LHS, Value *&RHS, + unsigned Depth) { LHS = CmpLHS; RHS = CmpRHS; @@ -4429,7 +4432,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, } if (CmpInst::isIntPredicate(Pred)) - return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); + return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth); // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar // may return either -0.0 or 0.0, so fcmp/select pair has stricter @@ -4550,7 +4553,11 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, } SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, - Instruction::CastOps *CastOp) { + Instruction::CastOps *CastOp, + unsigned Depth) { + if (Depth >= MaxDepth) + return {SPF_UNKNOWN, SPNB_NA, false}; + SelectInst *SI = dyn_cast<SelectInst>(V); if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4579,7 +4586,7 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast<CastInst>(TrueVal)->getOperand(0), C, - LHS, RHS); + LHS, RHS, Depth); } if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { // If this is a potential fmin/fmax with a cast to integer, then ignore @@ -4588,11 +4595,11 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast<CastInst>(FalseVal)->getOperand(0), - LHS, RHS); + LHS, RHS, Depth); } } return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, - LHS, RHS); + LHS, RHS, Depth); } /// Return true if "icmp Pred LHS RHS" is always true. diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 705d4ded5b569..a329a71e2c95c 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -812,6 +812,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { auto TII = MF->getTarget().getIntrinsicInfo(); const Function *F = CI.getCalledFunction(); + // FIXME: support Windows dllimport function calls. + if (F && F->hasDLLImportStorageClass()) + return false; + if (CI.isInlineAsm()) return translateInlineAsm(CI, MIRBuilder); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c7118201b7530..6bebe180fefd5 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -661,7 +661,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { } case TargetOpcode::G_FCONSTANT: { unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); - MIRBuilder.buildFConstant(DstExt, *MI.getOperand(1).getFPImm()); + const ConstantFP *CFP = MI.getOperand(1).getFPImm(); + APFloat Val = CFP->getValueAPF(); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + auto LLT2Sem = [](LLT Ty) { + switch (Ty.getSizeInBits()) { + case 32: + return &APFloat::IEEEsingle(); + break; + case 64: + return &APFloat::IEEEdouble(); + break; + default: + llvm_unreachable("Unhandled fp widen type"); + } + }; + bool LosesInfo; + Val.convert(*LLT2Sem(WideTy), APFloat::rmTowardZero, &LosesInfo); + MIRBuilder.buildFConstant(DstExt, *ConstantFP::get(Ctx, Val)); MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt); MI.eraseFromParent(); return Legalized; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 6a5282cbbbffb..17d9492d942e8 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -193,9 +193,10 @@ namespace { void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand &MO); - void definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, RegState NewState); + void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, + RegState NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; - void assignVirtToPhysReg(LiveReg&, MCPhysReg PhysReg); + void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); @@ -434,8 +435,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) { /// Mark PhysReg as reserved or free after spilling any virtregs. This is very /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. -void RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, - RegState NewState) { +void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, + MCPhysReg PhysReg, RegState NewState) { markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: @@ -857,7 +858,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { // Add live-in registers as live. for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins()) if (MRI->isAllocatable(LI.PhysReg)) - definePhysReg(*MII, LI.PhysReg, regReserved); + definePhysReg(MII, LI.PhysReg, regReserved); VirtDead.clear(); Coalesced.clear(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index befd797e75b42..bd9fcfb5c1e8d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1380,8 +1380,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; - if (TM.Options.EnableFastISel) + if (TM.Options.EnableFastISel) { + DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + } setupSwiftErrorVals(Fn, TLI, FuncInfo); diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index c90a93d7e2471..6c91bdc1c5244 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -717,6 +717,8 @@ bool TargetPassConfig::addCoreISelPasses() { if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() && EnableFastISelOption != cl::BOU_TRUE)) { + TM->setFastISel(false); + if (addIRTranslator()) return true; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e521b6e7c7049..bddd264fe30b0 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -192,9 +192,6 @@ public: void EmitGPRel32Value(const MCExpr *Value) override; - - void emitFill(uint64_t NumBytes, uint8_t FillValue) override; - void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc = SMLoc()) override; @@ -965,17 +962,12 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { EmitEOL(); } -/// emitFill - Emit NumBytes bytes worth of the value specified by -/// FillValue. This implements directives such as '.space'. -void MCAsmStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { - if (NumBytes == 0) return; - - const MCExpr *E = MCConstantExpr::create(NumBytes, getContext()); - emitFill(*E, FillValue); -} - void MCAsmStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) { + int64_t IntNumBytes; + if (NumBytes.evaluateAsAbsolute(IntNumBytes) && IntNumBytes == 0) + return; + if (const char *ZeroDirective = MAI->getZeroDirective()) { // FIXME: Emit location directives OS << ZeroDirective; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 01d165944bece..bd881b4d6e851 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -281,8 +281,18 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, return cast<MCRelaxableFragment>(F).getContents().size(); case MCFragment::FT_CompactEncodedInst: return cast<MCCompactEncodedInstFragment>(F).getContents().size(); - case MCFragment::FT_Fill: - return cast<MCFillFragment>(F).getSize(); + case MCFragment::FT_Fill: { + auto &FF = cast<MCFillFragment>(F); + int64_t Size = 0; + if (!FF.getSize().evaluateAsAbsolute(Size, Layout)) + getContext().reportError(FF.getLoc(), + "expected assembly-time absolute expression"); + if (Size < 0) { + getContext().reportError(FF.getLoc(), "invalid number of bytes"); + return 0; + } + return Size; + } case MCFragment::FT_LEB: return cast<MCLEBFragment>(F).getContents().size(); @@ -540,7 +550,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, for (unsigned I = 1; I < MaxChunkSize; ++I) Data[I] = Data[0]; - uint64_t Size = FF.getSize(); + uint64_t Size = FragmentSize; for (unsigned ChunkSize = MaxChunkSize; ChunkSize; ChunkSize /= 2) { StringRef Ref(Data, ChunkSize); for (uint64_t I = 0, E = Size / ChunkSize; I != E; ++I) diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 82b75afabb3cd..3969143bb2c72 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -411,29 +411,19 @@ void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - getAssembler().registerSection(*Section); - - // The symbol may not be present, which only creates the section. - if (!Symbol) - return; - // On darwin all virtual sections have zerofill type. assert(Section->isVirtualSection() && "Section does not have zerofill type!"); - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + PushSection(); + SwitchSection(Section); - getAssembler().registerSymbol(*Symbol); - - // Emit an align fragment if necessary. - if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section); - - MCFragment *F = new MCFillFragment(0, Size, Section); - Symbol->setFragment(F); - - // Update the maximum alignment on the zero fill section if necessary. - if (ByteAlignment > Section->getAlignment()) - Section->setAlignment(ByteAlignment); + // The symbol may not be present, which only creates the section. + if (Symbol) { + EmitValueToAlignment(ByteAlignment, 0, 1, 0); + EmitLabel(Symbol); + EmitZeros(Size); + } + PopSection(); } // This should always be called with the thread local bss section. Like the diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 15cc0faf5407f..aecb3844622b1 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -577,28 +577,13 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name, return false; } -void MCObjectStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { - assert(getCurrentSectionOnly() && "need a section"); - insert(new MCFillFragment(FillValue, NumBytes)); -} - void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) { MCDataFragment *DF = getOrCreateDataFragment(); flushPendingLabels(DF, DF->getContents().size()); - int64_t IntNumBytes; - if (!NumBytes.evaluateAsAbsolute(IntNumBytes, getAssembler())) { - getContext().reportError(Loc, "expected absolute expression"); - return; - } - - if (IntNumBytes <= 0) { - getContext().reportError(Loc, "invalid number of bytes"); - return; - } - - emitFill(IntNumBytes, FillValue); + assert(getCurrentSectionOnly() && "need a section"); + insert(new MCFillFragment(FillValue, NumBytes, Loc)); } void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size, diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 6e801ed8777cf..ed10ccbbb742a 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -184,8 +184,7 @@ void MCStreamer::EmitGPRel32Value(const MCExpr *Value) { /// Emit NumBytes bytes worth of the value specified by FillValue. /// This implements directives such as '.space'. void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { - for (uint64_t i = 0, e = NumBytes; i != e; ++i) - EmitIntValue(FillValue, 1); + emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue); } void MCStreamer::emitFill(uint64_t NumValues, int64_t Size, int64_t Expr) { diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index c2583d95c5eda..8582d9adafb84 100644 --- a/lib/MC/MCWinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -257,20 +257,13 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size, auto *Symbol = cast<MCSymbolCOFF>(S); MCSection *Section = getContext().getObjectFileInfo()->getBSSSection(); - getAssembler().registerSection(*Section); - if (Section->getAlignment() < ByteAlignment) - Section->setAlignment(ByteAlignment); - - getAssembler().registerSymbol(*Symbol); + PushSection(); + SwitchSection(Section); + EmitValueToAlignment(ByteAlignment, 0, 1, 0); + EmitLabel(Symbol); Symbol->setExternal(false); - - if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0, - ByteAlignment, Section); - - MCFillFragment *Fragment = new MCFillFragment( - /*Value=*/0, Size, Section); - Symbol->setFragment(Fragment); + EmitZeros(Size); + PopSection(); } void MCWinCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 0f0b645492ee0..66236e3abfaba 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -528,7 +528,10 @@ static void addData(SmallVectorImpl<char> &DataBytes, Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) { - DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue()); + int64_t Size; + if (!Fill->getSize().evaluateAsAbsolute(Size)) + llvm_unreachable("The fill should be an assembler constant"); + DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); } else { const auto &DataFrag = cast<MCDataFragment>(Frag); const SmallVectorImpl<char> &Contents = DataFrag.getContents(); diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 022200986d2bf..974f968ec2c4d 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -476,26 +476,27 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { // ADRP + LDRX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) - .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); + .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); ResultReg = createResultReg(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); } else { // ADRP + ADDX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) - .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); + .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); ResultReg = createResultReg(&AArch64::GPR64spRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) - .addImm(0); + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) + .addImm(0); } return ResultReg; } diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index b85b4e082996b..2bb9e381073af 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -929,6 +929,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I, return false; } + // FIXME: PR36018: Volatile loads in some cases are incorrectly selected by + // folding with an extend. Until we have a G_SEXTLOAD solution bail out if + // we hit one. + if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile()) + return false; + const unsigned PtrReg = I.getOperand(1).getReg(); #ifndef NDEBUG const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 688bb936d0caa..39b7644343882 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -189,15 +189,18 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) return AArch64II::MO_GOT; + unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT + : AArch64II::MO_NO_FLAG; + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) - return AArch64II::MO_GOT; + return AArch64II::MO_GOT | Flags; // The small code model's direct accesses use ADRP, which cannot // necessarily produce the value 0 (if the code is above 4GB). if (useSmallAddressing() && GV->hasExternalWeakLinkage()) - return AArch64II::MO_GOT; + return AArch64II::MO_GOT | Flags; - return AArch64II::MO_NO_FLAG; + return Flags; } unsigned char AArch64Subtarget::classifyGlobalFunctionReference( diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index a2f844d7854ec..eb7277b7a5bb6 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -210,65 +210,73 @@ void SIInsertSkips::kill(MachineInstr &MI) { switch (MI.getOperand(2).getImm()) { case ISD::SETOEQ: case ISD::SETEQ: - Opcode = AMDGPU::V_CMPX_EQ_F32_e32; + Opcode = AMDGPU::V_CMPX_EQ_F32_e64; break; case ISD::SETOGT: case ISD::SETGT: - Opcode = AMDGPU::V_CMPX_LT_F32_e32; + Opcode = AMDGPU::V_CMPX_LT_F32_e64; break; case ISD::SETOGE: case ISD::SETGE: - Opcode = AMDGPU::V_CMPX_LE_F32_e32; + Opcode = AMDGPU::V_CMPX_LE_F32_e64; break; case ISD::SETOLT: case ISD::SETLT: - Opcode = AMDGPU::V_CMPX_GT_F32_e32; + Opcode = AMDGPU::V_CMPX_GT_F32_e64; break; case ISD::SETOLE: case ISD::SETLE: - Opcode = AMDGPU::V_CMPX_GE_F32_e32; + Opcode = AMDGPU::V_CMPX_GE_F32_e64; break; case ISD::SETONE: case ISD::SETNE: - Opcode = AMDGPU::V_CMPX_LG_F32_e32; + Opcode = AMDGPU::V_CMPX_LG_F32_e64; break; case ISD::SETO: - Opcode = AMDGPU::V_CMPX_O_F32_e32; + Opcode = AMDGPU::V_CMPX_O_F32_e64; break; case ISD::SETUO: - Opcode = AMDGPU::V_CMPX_U_F32_e32; + Opcode = AMDGPU::V_CMPX_U_F32_e64; break; case ISD::SETUEQ: - Opcode = AMDGPU::V_CMPX_NLG_F32_e32; + Opcode = AMDGPU::V_CMPX_NLG_F32_e64; break; case ISD::SETUGT: - Opcode = AMDGPU::V_CMPX_NGE_F32_e32; + Opcode = AMDGPU::V_CMPX_NGE_F32_e64; break; case ISD::SETUGE: - Opcode = AMDGPU::V_CMPX_NGT_F32_e32; + Opcode = AMDGPU::V_CMPX_NGT_F32_e64; break; case ISD::SETULT: - Opcode = AMDGPU::V_CMPX_NLE_F32_e32; + Opcode = AMDGPU::V_CMPX_NLE_F32_e64; break; case ISD::SETULE: - Opcode = AMDGPU::V_CMPX_NLT_F32_e32; + Opcode = AMDGPU::V_CMPX_NLT_F32_e64; break; case ISD::SETUNE: - Opcode = AMDGPU::V_CMPX_NEQ_F32_e32; + Opcode = AMDGPU::V_CMPX_NEQ_F32_e64; break; default: llvm_unreachable("invalid ISD:SET cond code"); } - // TODO: Allow this: - if (!MI.getOperand(0).isReg() || - !TRI->isVGPR(MBB.getParent()->getRegInfo(), - MI.getOperand(0).getReg())) - llvm_unreachable("SI_KILL operand should be a VGPR"); - - BuildMI(MBB, &MI, DL, TII->get(Opcode)) - .add(MI.getOperand(1)) - .add(MI.getOperand(0)); + assert(MI.getOperand(0).isReg()); + + if (TRI->isVGPR(MBB.getParent()->getRegInfo(), + MI.getOperand(0).getReg())) { + Opcode = AMDGPU::getVOPe32(Opcode); + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .add(MI.getOperand(1)) + .add(MI.getOperand(0)); + } else { + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .addReg(AMDGPU::VCC, RegState::Define) + .addImm(0) // src0 modifiers + .add(MI.getOperand(1)) + .addImm(0) // src1 modifiers + .add(MI.getOperand(0)) + .addImm(0); // omod + } break; } case AMDGPU::SI_KILL_I1_TERMINATOR: { diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 1f02600a79827..79a8e3049702d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -39,11 +39,11 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, const MCSubtargetInfo &STI) { const MCInstrDesc &Desc = MII.get(MI->getOpcode()); uint64_t TSFlags = Desc.TSFlags; + unsigned Flags = MI->getFlags(); - if (TSFlags & X86II::LOCK) + if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK)) OS << "\tlock\t"; - unsigned Flags = MI->getFlags(); if (Flags & X86::IP_HAS_REPEAT_NE) OS << "\trepne\t"; else if (Flags & X86::IP_HAS_REPEAT) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e7d9334abe141..3a163637da26a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31776,9 +31776,10 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Check all uses of the condition operand to check whether it will be // consumed by non-BLEND instructions. Those may require that all bits // are set properly. - for (SDNode *U : Cond->uses()) { + for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); + UI != UE; ++UI) { // TODO: Add other opcodes eventually lowered into BLEND. - if (U->getOpcode() != ISD::VSELECT) + if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0) return SDValue(); } diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp index caa73b2ff01ce..4edea7cc3c825 100644 --- a/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -142,10 +142,11 @@ recordConditions(const CallSite &CS, BasicBlock *Pred, recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions); BasicBlock *From = Pred; BasicBlock *To = Pred; - SmallPtrSet<BasicBlock *, 4> Visited = {From}; + SmallPtrSet<BasicBlock *, 4> Visited; while (!Visited.count(From->getSinglePredecessor()) && (From = From->getSinglePredecessor())) { recordCondition(CS, From, To, Conditions); + Visited.insert(From); To = From; } } diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 525425bd0f0cd..b8fb80b6cc266 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" @@ -176,8 +177,9 @@ class StructurizeCFG : public RegionPass { Region *ParentRegion; DominatorTree *DT; + LoopInfo *LI; - std::deque<RegionNode *> Order; + SmallVector<RegionNode *, 8> Order; BBSet Visited; BBPhiMap DeletedPhis; @@ -202,7 +204,7 @@ class StructurizeCFG : public RegionPass { void gatherPredicates(RegionNode *N); - void analyzeNode(RegionNode *N); + void collectInfos(); void insertConditions(bool Loops); @@ -256,6 +258,7 @@ public: AU.addRequired<DivergenceAnalysis>(); AU.addRequiredID(LowerSwitchID); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); RegionPass::getAnalysisUsage(AU); @@ -289,17 +292,55 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { /// \brief Build up the general order of nodes void StructurizeCFG::orderNodes() { - assert(Visited.empty()); - assert(Predicates.empty()); - assert(Loops.empty()); - assert(LoopPreds.empty()); - - // This must be RPO order for the back edge detection to work - for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) { - // FIXME: Is there a better order to use for structurization? - Order.push_back(RN); - analyzeNode(RN); + ReversePostOrderTraversal<Region*> RPOT(ParentRegion); + SmallDenseMap<Loop*, unsigned, 8> LoopBlocks; + + // The reverse post-order traversal of the list gives us an ordering close + // to what we want. The only problem with it is that sometimes backedges + // for outer loops will be visited before backedges for inner loops. + for (RegionNode *RN : RPOT) { + BasicBlock *BB = RN->getEntry(); + Loop *Loop = LI->getLoopFor(BB); + ++LoopBlocks[Loop]; } + + unsigned CurrentLoopDepth = 0; + Loop *CurrentLoop = nullptr; + for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + BasicBlock *BB = (*I)->getEntry(); + unsigned LoopDepth = LI->getLoopDepth(BB); + + if (is_contained(Order, *I)) + continue; + + if (LoopDepth < CurrentLoopDepth) { + // Make sure we have visited all blocks in this loop before moving back to + // the outer loop. + + auto LoopI = I; + while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { + LoopI++; + BasicBlock *LoopBB = (*LoopI)->getEntry(); + if (LI->getLoopFor(LoopBB) == CurrentLoop) { + --BlockCount; + Order.push_back(*LoopI); + } + } + } + + CurrentLoop = LI->getLoopFor(BB); + if (CurrentLoop) + LoopBlocks[CurrentLoop]--; + + CurrentLoopDepth = LoopDepth; + Order.push_back(*I); + } + + // This pass originally used a post-order traversal and then operated on + // the list in reverse. Now that we are using a reverse post-order traversal + // rather than re-working the whole pass to operate on the list in order, + // we just reverse the list and continue to operate on it in reverse. + std::reverse(Order.begin(), Order.end()); } /// \brief Determine the end of the loops @@ -425,19 +466,32 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { } /// \brief Collect various loop and predicate infos -void StructurizeCFG::analyzeNode(RegionNode *RN) { - DEBUG(dbgs() << "Visiting: " - << (RN->isSubRegion() ? "SubRegion with entry: " : "") - << RN->getEntry()->getName() << '\n'); +void StructurizeCFG::collectInfos() { + // Reset predicate + Predicates.clear(); + + // and loop infos + Loops.clear(); + LoopPreds.clear(); - // Analyze all the conditions leading to a node - gatherPredicates(RN); + // Reset the visited nodes + Visited.clear(); + + for (RegionNode *RN : reverse(Order)) { + DEBUG(dbgs() << "Visiting: " + << (RN->isSubRegion() ? "SubRegion with entry: " : "") + << RN->getEntry()->getName() << " Loop Depth: " + << LI->getLoopDepth(RN->getEntry()) << "\n"); + + // Analyze all the conditions leading to a node + gatherPredicates(RN); - // Remember that we've seen this node - Visited.insert(RN->getEntry()); + // Remember that we've seen this node + Visited.insert(RN->getEntry()); - // Find the last back edges - analyzeLoops(RN); + // Find the last back edges + analyzeLoops(RN); + } } /// \brief Insert the missing branch conditions @@ -610,7 +664,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { LLVMContext &Context = Func->getContext(); BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : - Order.front()->getEntry(); + Order.back()->getEntry(); BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); DT->addNewBlock(Flow, Dominator); @@ -690,8 +744,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { /// Take one node from the order vector and wire it up void StructurizeCFG::wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd) { - RegionNode *Node = Order.front(); - Order.pop_front(); + RegionNode *Node = Order.pop_back_val(); Visited.insert(Node->getEntry()); if (isPredictableTrue(Node)) { @@ -715,7 +768,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, PrevNode = Node; while (!Order.empty() && !Visited.count(LoopEnd) && - dominatesPredicates(Entry, Order.front())) { + dominatesPredicates(Entry, Order.back())) { handleLoops(false, LoopEnd); } @@ -726,7 +779,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, void StructurizeCFG::handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd) { - RegionNode *Node = Order.front(); + RegionNode *Node = Order.back(); BasicBlock *LoopStart = Node->getEntry(); if (!Loops.count(LoopStart)) { @@ -871,9 +924,10 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { ParentRegion = R; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); orderNodes(); - + collectInfos(); createFlow(); insertConditions(false); insertConditions(true); diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 8c9ecbc3503e2..55fff3f3872a6 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -536,13 +537,23 @@ Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) { return None; } +static Metadata *cloneOrBuildODR(const MDNode &N) { + auto *CT = dyn_cast<DICompositeType>(&N); + // If ODR type uniquing is enabled, we would have uniqued composite types + // with identifiers during bitcode reading, so we can just use CT. + if (CT && CT->getContext().isODRUniquingDebugTypes() && + CT->getIdentifier() != "") + return const_cast<DICompositeType *>(CT); + return MDNode::replaceWithDistinct(N.clone()); +} + MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) { assert(N.isDistinct() && "Expected a distinct node"); assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); - DistinctWorklist.push_back(cast<MDNode>( - (M.Flags & RF_MoveDistinctMDs) - ? M.mapToSelf(&N) - : M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone())))); + DistinctWorklist.push_back( + cast<MDNode>((M.Flags & RF_MoveDistinctMDs) + ? M.mapToSelf(&N) + : M.mapToMetadata(&N, cloneOrBuildODR(N)))); return DistinctWorklist.back(); } diff --git a/test/Analysis/ValueTracking/select-pattern.ll b/test/Analysis/ValueTracking/select-pattern.ll new file mode 100644 index 0000000000000..455df00ef1218 --- /dev/null +++ b/test/Analysis/ValueTracking/select-pattern.ll @@ -0,0 +1,46 @@ +; RUN: opt -simplifycfg < %s -S | FileCheck %s + +; The dead code would cause a select that had itself +; as an operand to be analyzed. This would then cause +; infinite recursion and eventual crash. + +define void @PR36045(i1 %t, i32* %b) { +; CHECK-LABEL: @PR36045( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + br i1 %t, label %if, label %end + +if: + br i1 %t, label %unreach, label %pre + +unreach: + unreachable + +pre: + %p = phi i32 [ 70, %if ], [ %sel, %for ] + br label %for + +for: + %cmp = icmp sgt i32 %p, 8 + %add = add i32 %p, 2 + %sel = select i1 %cmp, i32 %p, i32 %add + %cmp21 = icmp ult i32 %sel, 21 + br i1 %cmp21, label %pre, label %for.end + +for.end: + br i1 %t, label %unreach2, label %then12 + +then12: + store i32 0, i32* %b + br label %unreach2 + +unreach2: + %spec = phi i32 [ %sel, %for.end ], [ 42, %then12 ] + unreachable + +end: + ret void +} + diff --git a/test/CodeGen/AArch64/GlobalISel/fallback-nofastisel.ll b/test/CodeGen/AArch64/GlobalISel/fallback-nofastisel.ll new file mode 100644 index 0000000000000..faaa725486f16 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/fallback-nofastisel.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=aarch64_be-- %s -o /dev/null -debug-only=isel -O0 2>&1 | FileCheck %s +; REQUIRES: asserts + +; This test uses big endian in order to force an abort since it's not currently supported for GISel. +; The purpose is to check that we don't fall back to FastISel. Checking the pass structure is insufficient +; because the FastISel is set up in the SelectionDAGISel, so it doesn't appear on the pass structure. + +; CHECK-NOT: Enabling fast-ise +define void @empty() { + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll new file mode 100644 index 0000000000000..9bda39c9fca7f --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-volatile-load-pr36018.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 -mtriple=aarch64-apple-ios -o - %s | FileCheck %s + +@g = global i16 0, align 2 +declare void @bar(i32) + +; Check that only one load is generated. We fall back to +define hidden void @foo() { +; CHECK-NOT: ldrh +; CHECK: ldrsh + %1 = load volatile i16, i16* @g, align 2 + %2 = sext i16 %1 to i32 + call void @bar(i32 %2) + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index 4ed84ed79bbaa..c3ed3316ad317 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -75,7 +75,7 @@ body: | ; CHECK: %w0 = COPY [[C]](s32) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK: %x0 = COPY [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT half 0xH0000 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[C2]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) ; CHECK: %w0 = COPY [[ANYEXT]](s32) diff --git a/test/CodeGen/AArch64/dllimport.ll b/test/CodeGen/AArch64/dllimport.ll index fad049a54cd22..281c847a39a56 100644 --- a/test/CodeGen/AArch64/dllimport.ll +++ b/test/CodeGen/AArch64/dllimport.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s +; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,DAG-ISEL +; RUN: llc -mtriple aarch64-unknown-windows-msvc -fast-isel -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,FAST-ISEL +; RUN: llc -mtriple aarch64-unknown-windows-msvc -O0 -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,GLOBAL-ISEL,GLOBAL-ISEL-FALLBACK @var = external dllimport global i32 @ext = external global i32 @@ -23,7 +25,11 @@ define i32 @get_ext() { ; CHECK-LABEL: get_ext ; CHECK: adrp x8, ext -; CHECK: ldr w0, [x8, ext] +; DAG-ISEL: ldr w0, [x8, ext] +; FAST-ISEL: add x8, x8, ext +; FAST-ISEL: ldr w0, [x8] +; GLOBAL-ISEL-FALLBACK: add x8, x8, ext +; GLOBAL-ISEL-FALLBACK: ldr w0, [x8] ; CHECK: ret define i32* @get_var_pointer() { @@ -31,8 +37,8 @@ define i32* @get_var_pointer() { } ; CHECK-LABEL: get_var_pointer -; CHECK: adrp x0, __imp_var -; CHECK: ldr x0, [x0, __imp_var] +; CHECK: adrp [[REG1:x[0-9]+]], __imp_var +; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]], __imp_var] ; CHECK: ret define i32 @call_external() { @@ -51,4 +57,6 @@ define i32 @call_internal() { } ; CHECK-LABEL: call_internal -; CHECK: b internal +; DAG-ISEL: b internal +; FAST-ISEL: b internal +; GLOBAL-ISEL: bl internal diff --git a/test/CodeGen/AArch64/fast-regalloc-empty-bb-with-liveins.mir b/test/CodeGen/AArch64/fast-regalloc-empty-bb-with-liveins.mir new file mode 100644 index 0000000000000..fc19173a176b9 --- /dev/null +++ b/test/CodeGen/AArch64/fast-regalloc-empty-bb-with-liveins.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-apple-ios -run-pass regallocfast -o - %s | FileCheck %s +# This test used to crash the fast register alloc. +# Basically, when a basic block has liveins, the fast regalloc +# was deferencing the begin iterator of this block. However, +# when this block is empty and it will just crashed! +--- +name: crashing +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: crashing + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: %x0, %x1 + ; CHECK: bb.1: + ; CHECK: renamable %w0 = MOVi32imm -1 + ; CHECK: RET_ReallyLR implicit killed %w0 + bb.1: + liveins: %x0, %x1 + + bb.2: + %0:gpr32 = MOVi32imm -1 + %w0 = COPY %0 + RET_ReallyLR implicit %w0 + +... diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll index d6b0628956a02..fabed795c5686 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -234,6 +234,23 @@ define amdgpu_ps void @wqm(float %a) { ret void } +; This checks that we use the 64-bit encoding when the operand is a SGPR. +; SI-LABEL: {{^}}test_sgpr: +; SI: v_cmpx_ge_f32_e64 +define amdgpu_ps void @test_sgpr(float inreg %a) #0 { + %c = fcmp ole float %a, 1.000000e+00 + call void @llvm.amdgcn.kill(i1 %c) #1 + ret void +} + +; SI-LABEL: {{^}}test_non_inline_imm_sgpr: +; SI-NOT: v_cmpx_ge_f32_e64 +define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 { + %c = fcmp ole float %a, 1.500000e+00 + call void @llvm.amdgcn.kill(i1 %c) #1 + ret void +} + declare void @llvm.amdgcn.kill(i1) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare i1 @llvm.amdgcn.wqm.vote(i1) diff --git a/test/CodeGen/AMDGPU/multilevel-break.ll b/test/CodeGen/AMDGPU/multilevel-break.ll index 5b556f12f0d6d..8cc02d497098b 100644 --- a/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/test/CodeGen/AMDGPU/multilevel-break.ll @@ -66,10 +66,9 @@ ENDIF: ; preds = %LOOP ; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop( ; OPT: llvm.amdgcn.break -; OPT: llvm.amdgcn.break +; OPT: llvm.amdgcn.loop ; OPT: llvm.amdgcn.if.break ; OPT: llvm.amdgcn.if.break -; OPT: llvm.amdgcn.loop ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 96d2841e685f4..672549c8ea636 100644 --- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -124,100 +124,55 @@ bb23: ; preds = %bb10 ; Earlier version of above, before a run of the structurizer. ; IR-LABEL: @nested_loop_conditions( -; IR: %tmp1235 = icmp slt i32 %tmp1134, 9 -; IR: br i1 %tmp1235, label %bb14.lr.ph, label %Flow - -; IR: bb14.lr.ph: -; IR: br label %bb14 - -; IR: Flow3: -; IR: call void @llvm.amdgcn.end.cf(i64 %18) -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17) -; IR: %1 = extractvalue { i1, i64 } %0, 0 -; IR: %2 = extractvalue { i1, i64 } %0, 1 -; IR: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4 - -; IR: bb4.bb13_crit_edge: -; IR: br label %Flow4 - -; IR: Flow4: -; IR: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %2) -; IR: br label %Flow - -; IR: bb13: -; IR: br label %bb31 - -; IR: Flow: -; IR: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ] -; IR: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4) -; IR: %6 = extractvalue { i1, i64 } %5, 0 -; IR: %7 = extractvalue { i1, i64 } %5, 1 -; IR: br i1 %6, label %bb13, label %bb31 - -; IR: bb14: -; IR: %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ] -; IR: %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ] -; IR: %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ] -; IR: %tmp15 = icmp eq i32 %tmp1037, 1 -; IR: %8 = xor i1 %tmp15, true -; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) -; IR: %10 = extractvalue { i1, i64 } %9, 0 -; IR: %11 = extractvalue { i1, i64 } %9, 1 -; IR: br i1 %10, label %bb31.loopexit, label %Flow1 +; IR: Flow7: +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17) +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15) +; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 +; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 +; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8 ; IR: Flow1: -; IR: %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11) -; IR: %13 = extractvalue { i1, i64 } %12, 0 -; IR: %14 = extractvalue { i1, i64 } %12, 1 -; IR: br i1 %13, label %bb16, label %Flow2 - -; IR: bb16: -; IR: %tmp17 = bitcast i64 %tmp3 to <2 x i32> -; IR: br label %bb18 +; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ] +; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ] +; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ] +; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ] +; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ] +; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) +; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17) +; IR-NEXT: br i1 %18, label %Flow7, label %bb14 ; IR: Flow2: -; IR: %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ] -; IR: %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ] -; IR: %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ] -; IR: %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ] -; IR: %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi) -; IR: call void @llvm.amdgcn.end.cf(i64 %14) -; IR: %19 = call i1 @llvm.amdgcn.loop(i64 %18) -; IR: br i1 %19, label %Flow3, label %bb14 - -; IR: bb18: -; IR: %tmp19 = load volatile i32, i32 addrspace(1)* undef -; IR: %tmp20 = icmp slt i32 %tmp19, 9 -; IR: br i1 %tmp20, label %bb21, label %bb18 +; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ] +; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ] +; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ] +; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ] +; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ] +; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ] +; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23) +; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0 +; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1 +; IR-NEXT: br i1 %25, label %bb21, label %Flow3 ; IR: bb21: -; IR: %tmp22 = extractelement <2 x i32> %tmp17, i64 1 -; IR: %tmp23 = lshr i32 %tmp22, 16 -; IR: %tmp24 = select i1 undef, i32 undef, i32 %tmp23 -; IR: %tmp25 = uitofp i32 %tmp24 to float -; IR: %tmp26 = fmul float %tmp25, 0x3EF0001000000000 -; IR: %tmp27 = fsub float %tmp26, undef -; IR: %tmp28 = fcmp olt float %tmp27, 5.000000e-01 -; IR: %tmp29 = select i1 %tmp28, i64 1, i64 2 -; IR: %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29 -; IR: %tmp7 = zext i32 %tmp30 to i64 -; IR: %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7 -; IR: %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16 -; IR: %tmp10 = extractelement <4 x i32> %tmp9, i64 0 -; IR: %tmp11 = load volatile i32, i32 addrspace(1)* undef -; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR: %20 = xor i1 %tmp12, true -; IR: %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken) -; IR: br label %Flow2 - -; IR: bb31.loopexit: -; IR: br label %Flow1 +; IR: %tmp12 = icmp slt i32 %tmp11, 9 +; IR-NEXT: %27 = xor i1 %tmp12, true +; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken) +; IR-NEXT: br label %Flow3 + +; IR: Flow3: +; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ] +; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ] +; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] +; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] +; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ] +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26) +; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4 ; IR: bb31: -; IR: call void @llvm.amdgcn.end.cf(i64 %7) -; IR: store volatile i32 0, i32 addrspace(1)* undef -; IR: ret void +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7) +; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef +; IR-NEXT: ret void ; GCN-LABEL: {{^}}nested_loop_conditions: diff --git a/test/CodeGen/X86/pr34592.ll b/test/CodeGen/X86/pr34592.ll new file mode 100644 index 0000000000000..09dfade1ee536 --- /dev/null +++ b/test/CodeGen/X86/pr34592.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s + +define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) { +; CHECK-LABEL: pluto: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: andq $-32, %rsp +; CHECK-NEXT: subq $352, %rsp # imm = 0x160 +; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 +; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 +; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 +; CHECK-NEXT: vmovaps 144(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 112(%rbp), %ymm12 +; CHECK-NEXT: vmovaps 80(%rbp), %ymm13 +; CHECK-NEXT: vmovaps 48(%rbp), %ymm14 +; CHECK-NEXT: vmovaps 16(%rbp), %ymm15 +; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,18446744071562067968,18446744071562067968] +; CHECK-NEXT: vblendvpd %ymm0, %ymm2, %ymm6, %ymm0 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm2[0,1],ymm13[2,3],ymm2[4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm2[0,1],ymm8[2,3,4,5,6,7] +; CHECK-NEXT: vmovaps {{.*#+}} ymm13 = [18446744071562067968,18446744071562067968,0,0] +; CHECK-NEXT: vblendvpd %ymm13, %ymm9, %ymm6, %ymm6 +; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm0[0,1,2,3],ymm11[4,5],ymm0[6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[3,2,2,1] +; CHECK-NEXT: vmovaps %xmm6, %xmm11 +; CHECK-NEXT: # implicit-def: %ymm13 +; CHECK-NEXT: vinserti128 $1, %xmm11, %ymm13, %ymm13 +; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm13[4,5],ymm9[6,7] +; CHECK-NEXT: vmovaps %xmm0, %xmm11 +; CHECK-NEXT: # implicit-def: %ymm0 +; CHECK-NEXT: vinserti128 $1, %xmm11, %ymm0, %ymm0 +; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3],ymm7[4,5],ymm8[6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm13 = ymm8[2,0,2,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm13[0,1,2,3],ymm0[4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm7[0,1,2,3],ymm2[4,5],ymm7[6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,0,2,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm5[4,5],ymm6[6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,1,1,2] +; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm6[4,5,6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm8[2,1,1,3] +; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] +; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3,4,5],ymm5[6,7] +; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm9, %ymm0 +; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm5, %ymm1 +; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm5 # 32-byte Reload +; CHECK-NEXT: vmovaps %ymm2, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm5, %ymm2 +; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm6 # 32-byte Reload +; CHECK-NEXT: vmovaps %ymm3, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm6, %ymm3 +; CHECK-NEXT: vmovaps %ymm15, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm12, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm10, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +bb: + %tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1 + %tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer + %tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5 + %tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6> + ret <16 x i64> %tmp7 +} diff --git a/test/MC/X86/eval-fill.s b/test/MC/X86/eval-fill.s new file mode 100644 index 0000000000000..f09166ee0f122 --- /dev/null +++ b/test/MC/X86/eval-fill.s @@ -0,0 +1,17 @@ +// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-readobj -s | FileCheck %s + +// CHECK: Name: .text +// CHECK-NEXT: Type: SHT_PROGBITS +// CHECK-NEXT: Flags [ +// CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_EXECINSTR +// CHECK-NEXT: ] +// CHECK-NEXT: Address: +// CHECK-NEXT: Offset: +// CHECK-NEXT: Size: 4092 + + .globl foo +foo: + .space 4 +bar: + .space 4092 - (bar - foo) diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s index 03be55fa4beb9..09e147fd1627c 100644 --- a/test/MC/X86/x86-32-coverage.s +++ b/test/MC/X86/x86-32-coverage.s @@ -10774,3 +10774,9 @@ btcl $4, (%eax) // CHECK: clzero // CHECK: encoding: [0x0f,0x01,0xfc] clzero + +// CHECK: lock addl %esi, (%edi) +// INTEL: lock add dword ptr [edi], esi +// CHECK: encoding: [0xf0,0x01,0x37] + lock add %esi, (%edi) + diff --git a/test/ThinLTO/X86/Inputs/dicompositetype-unique-alias.ll b/test/ThinLTO/X86/Inputs/dicompositetype-unique-alias.ll new file mode 100644 index 0000000000000..5c126e3f47681 --- /dev/null +++ b/test/ThinLTO/X86/Inputs/dicompositetype-unique-alias.ll @@ -0,0 +1,39 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +%struct.CFVS = type { %struct.Vec } +%struct.Vec = type { i8 } +%struct.S = type { i8 } + +@_ZN4CFVSD1Ev = alias void (%struct.CFVS*), void (%struct.CFVS*)* @_ZN4CFVSD2Ev + +define void @_ZN4CFVSD2Ev(%struct.CFVS* %this) unnamed_addr align 2 !dbg !8 { +entry: + %this.addr = alloca %struct.CFVS*, align 8 + store %struct.CFVS* %this, %struct.CFVS** %this.addr, align 8 + %this1 = load %struct.CFVS*, %struct.CFVS** %this.addr, align 8 + %m_val = getelementptr inbounds %struct.CFVS, %struct.CFVS* %this1, i32 0, i32 0 + ret void +} + +declare dereferenceable(1) %struct.S* @_Z3Getv() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "bz188598-b.cpp", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 2} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "~CFVS", linkageName: "_ZN4CFVSD2Ev", scope: !9, file: !1, line: 2, type: !28, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !27, variables: !2) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !10, line: 7, size: 8, elements: !11, identifier: "_ZTS4CFVS") +!10 = !DIFile(filename: "./bz188598.h", directory: "") +!11 = !{!35} +!27 = !DISubprogram(name: "~CFVS", scope: !9, file: !10, line: 8, type: !28, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false) +!28 = !DISubroutineType(types: !29) +!29 = !{null, !30} +!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!35 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/test/ThinLTO/X86/dicompositetype-unique-alias.ll b/test/ThinLTO/X86/dicompositetype-unique-alias.ll new file mode 100644 index 0000000000000..28c363d611109 --- /dev/null +++ b/test/ThinLTO/X86/dicompositetype-unique-alias.ll @@ -0,0 +1,62 @@ +; RUN: opt -module-summary -o %t1.bc %s +; RUN: opt -module-summary -o %t2.bc %S/Inputs/dicompositetype-unique-alias.ll +; RUN: llvm-lto --thinlto-action=run %t1.bc %t2.bc -thinlto-save-temps=%t3. +; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s +; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t --save-temps \ +; RUN: -r %t1.bc,_ZN1CD2Ev,pl \ +; RUN: -r %t1.bc,_ZN4CFVSD1Ev,l \ +; RUN: -r %t1.bc,_ZN4CFVSD2Ev,l \ +; RUN: -r %t1.bc,_Z3Getv,l \ +; RUN: -r %t2.bc,_ZN4CFVSD1Ev,pl \ +; RUN: -r %t2.bc,_ZN4CFVSD2Ev,pl \ +; RUN: -r %t2.bc,_Z3Getv,l +; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s + +; Only llvm-lto2 adds the dso_local keyword, hence the {{.*}} +; CHECK: define available_externally{{.*}} void @_ZN4CFVSD1Ev + +; Confirm that we only have a single DICompositeType after importing +; both an alias and its aliasee, since ODR Type Uniquing is enabled. +; CHECK: DICompositeType +; CHECK-NOT: DICompositeType + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +%class.C = type <{ i32 (...)**, %class.A, %struct.CFVS, [6 x i8] }> +%class.A = type { %struct.Vec } +%struct.Vec = type { i8 } +%struct.CFVS = type { %struct.Vec } +%struct.S = type { i8 } + +define void @_ZN1CD2Ev(%class.C* %this) unnamed_addr align 2 { +entry: + %this.addr = alloca %class.C*, align 8 + %this1 = load %class.C*, %class.C** %this.addr, align 8 + %m = getelementptr inbounds %class.C, %class.C* %this1, i32 0, i32 2 + call void @_ZN4CFVSD1Ev(%struct.CFVS* %m), !dbg !50 + call void @_ZN4CFVSD2Ev(%struct.CFVS* %m), !dbg !50 + ret void +} + +declare void @_ZN4CFVSD1Ev(%struct.CFVS*) unnamed_addr +declare void @_ZN4CFVSD2Ev(%struct.CFVS*) unnamed_addr + +declare dereferenceable(1) %struct.S* @_Z3Getv() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "bz188598-a.cpp", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 2} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", scope: !1, file: !1, line: 9, type: !47, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!47 = !DISubroutineType(types: !48) +!48 = !{!55} +!50 = !DILocation(line: 9, scope: !51) +!51 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9) +!55 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll b/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll index ca41bd6fc5e1b..25b4cb23be605 100644 --- a/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll +++ b/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll @@ -16,3 +16,27 @@ Tail: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) ret i32 %r } + +define void @fn1(i16 %p1) { +entry: + ret void +} + +define void @fn2() { + ret void + +; Unreachable code below + +for.inc: ; preds = %for.inc + br i1 undef, label %for.end6, label %for.inc + +for.end6: ; preds = %for.inc + br i1 undef, label %lor.rhs, label %lor.end + +lor.rhs: ; preds = %for.end6 + br label %lor.end + +lor.end: ; preds = %for.end6, %lor.rhs + call void @fn1(i16 0) + ret void +} diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll index 933aac7e23f2a..bf54204df080b 100644 --- a/test/Transforms/InstCombine/minmax-fold.ll +++ b/test/Transforms/InstCombine/minmax-fold.ll @@ -899,3 +899,24 @@ define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) { ret i32 %max_abc } +; This would assert. Don't assume that earlier min/max types match a possible later min/max. + +define float @not_min_of_min(i8 %i, float %x) { +; CHECK-LABEL: @not_min_of_min( +; CHECK-NEXT: [[CMP1_INV:%.*]] = fcmp fast oge float [[X:%.*]], 1.000000e+00 +; CHECK-NEXT: [[MIN1:%.*]] = select i1 [[CMP1_INV]], float 1.000000e+00, float [[X]] +; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast oge float [[X]], 2.000000e+00 +; CHECK-NEXT: [[MIN2:%.*]] = select i1 [[CMP2_INV]], float 2.000000e+00, float [[X]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i8 [[I:%.*]], 16 +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP3]], float [[MIN1]], float [[MIN2]] +; CHECK-NEXT: ret float [[R]] +; + %cmp1 = fcmp fast ult float %x, 1.0 + %min1 = select i1 %cmp1, float %x, float 1.0 + %cmp2 = fcmp fast ult float %x, 2.0 + %min2 = select i1 %cmp2, float %x, float 2.0 + %cmp3 = icmp ult i8 %i, 16 + %r = select i1 %cmp3, float %min1, float %min2 + ret float %r +} + diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll index 9cddffdd17951..5b5ea676baec9 100644 --- a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll +++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s diff --git a/test/Transforms/StructurizeCFG/bug36015.ll b/test/Transforms/StructurizeCFG/bug36015.ll new file mode 100644 index 0000000000000..24b9c9cdde2d6 --- /dev/null +++ b/test/Transforms/StructurizeCFG/bug36015.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -structurizecfg %s | FileCheck %s + +; r321751 introduced a bug where control flow branching from if to exit was +; not handled properly and instead ended up in an infinite loop. +define void @bug36015(i32 %cmp0, i32 %count) { +entry: + br label %loop.outer + +loop.outer: + %ctr.loop.outer = phi i32 [ 0, %entry ], [ %ctr.else, %else ] + call void @foo(i32 0) + br label %loop.inner + +loop.inner: + %ctr.loop.inner = phi i32 [ %ctr.loop.outer, %loop.outer ], [ %ctr.if, %if ] + call void @foo(i32 1) + %cond.inner = icmp eq i32 %cmp0, %ctr.loop.inner + br i1 %cond.inner, label %if, label %else + +; CHECK: if: +; CHECK: %0 = xor i1 %cond.if, true +; CHECK: br label %Flow +if: + %ctr.if = add i32 %ctr.loop.inner, 1 + call void @foo(i32 2) + %cond.if = icmp slt i32 %ctr.if, %count + br i1 %cond.if, label %loop.inner, label %exit + +; CHECK: Flow: +; CHECK: %2 = phi i1 [ %0, %if ], [ true, %loop.inner ] +; CHECK: %3 = phi i1 [ false, %if ], [ true, %loop.inner ] +; CHECK: br i1 %2, label %Flow1, label %loop.inner + +; CHECK: Flow1: +; CHECK: br i1 %3, label %else, label %Flow2 + +; CHECK: else: +; CHECK: br label %Flow2 +else: + %ctr.else = add i32 %ctr.loop.inner, 1 + call void @foo(i32 3) + %cond.else = icmp slt i32 %ctr.else, %count + br i1 %cond.else, label %loop.outer, label %exit + +; CHECK: Flow2: +; CHECK: %6 = phi i1 [ %4, %else ], [ true, %Flow1 ] +; CHECK: br i1 %6, label %exit, label %loop.outer + +exit: + ret void +} + +declare void @foo(i32) diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll index 7b5bd5acb6298..58634d0d37db8 100644 --- a/test/Transforms/StructurizeCFG/nested-loop-order.ll +++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll @@ -1,76 +1,32 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -structurizecfg %s -o - | FileCheck %s define void @main(float addrspace(1)* %out) { -; CHECK-LABEL: @main( -; CHECK-NEXT: main_body: -; CHECK-NEXT: br label [[LOOP_OUTER:%.*]] -; CHECK: LOOP.outer: -; CHECK-NEXT: [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ] -; CHECK-NEXT: [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: LOOP: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ] -; CHECK-NEXT: [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ] -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP22]], true -; CHECK-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]] -; CHECK: Flow2: -; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ] -; CHECK-NEXT: br label [[FLOW]] -; CHECK: Flow3: -; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]] -; CHECK: ENDLOOP: -; CHECK-NEXT: [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3 -; CHECK-NEXT: [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00 -; CHECK-NEXT: store float [[DOT45]], float addrspace(1)* [[OUT:%.*]] -; CHECK-NEXT: ret void -; CHECK: ENDIF: -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TMP31]], true -; CHECK-NEXT: br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]] -; CHECK: Flow1: -; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ] -; CHECK-NEXT: [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ] -; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ] -; CHECK-NEXT: br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]] -; CHECK: IF29: -; CHECK-NEXT: [[TMP32]] = icmp sgt i32 [[TMP20]], 2 -; CHECK-NEXT: br label [[FLOW2]] -; CHECK: Flow: -; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ] -; CHECK-NEXT: [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ] -; CHECK-NEXT: [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ] -; CHECK-NEXT: [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ] -; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ] -; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ] -; CHECK-NEXT: br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]] -; CHECK: ENDIF28: -; CHECK-NEXT: [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00 -; CHECK-NEXT: [[TMP36]] = icmp sgt i32 [[TMP20]], 2 -; CHECK-NEXT: br label [[FLOW1]] -; + +; CHECK: main_body: +; CHECK: br label %LOOP.outer main_body: br label %LOOP.outer +; CHECK: LOOP.outer: +; CHECK: br label %LOOP LOOP.outer: ; preds = %ENDIF28, %main_body %temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ] %temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ] br label %LOOP +; CHECK: LOOP: +; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow LOOP: ; preds = %IF29, %LOOP.outer %temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ] %tmp20 = add i32 %temp4.0, 1 %tmp22 = icmp sgt i32 %tmp20, 3 br i1 %tmp22, label %ENDLOOP, label %ENDIF +; CHECK: Flow3 +; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer + +; CHECK: ENDLOOP: +; CHECK: ret void ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP %temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ] %tmp23 = icmp eq i32 %tmp20, 3 @@ -78,14 +34,29 @@ ENDLOOP: ; preds = %ENDIF28, %IF29, %LO store float %.45, float addrspace(1)* %out ret void +; CHECK: ENDIF: +; CHECK: br i1 %tmp31, label %IF29, label %Flow1 ENDIF: ; preds = %LOOP %tmp31 = icmp sgt i32 %tmp20, 1 br i1 %tmp31, label %IF29, label %ENDIF28 +; CHECK: Flow: +; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP + +; CHECK: IF29: +; CHECK: br label %Flow1 IF29: ; preds = %ENDIF %tmp32 = icmp sgt i32 %tmp20, 2 br i1 %tmp32, label %ENDLOOP, label %LOOP +; CHECK: Flow1: +; CHECK: br label %Flow + +; CHECK: Flow2: +; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3 + +; CHECK: ENDIF28: +; CHECK: br label %Flow3 ENDIF28: ; preds = %ENDIF %tmp35 = fadd float %temp8.0.ph, 1.0 %tmp36 = icmp sgt i32 %tmp20, 2 |