diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-03 15:20:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-03 15:20:36 +0000 |
commit | d288ef4c1788d3a951a7558c68312c2d320612b1 (patch) | |
tree | ece909a5200f95f85f0813599a9500620f4d9217 | |
parent | f382538d471e38a9b98f016c4caebd24c8d60b62 (diff) |
Notes
277 files changed, 9435 insertions, 2819 deletions
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index bd24eab93b50..35c255002001 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -415,6 +415,9 @@ public: append(IL.begin(), IL.end()); } + // FIXME: Consider assigning over existing elements, rather than clearing & + // re-initializing them - for all assign(...) variants. + void assign(size_type NumElts, const T &Elt) { clear(); if (this->capacity() < NumElts) @@ -423,6 +426,11 @@ public: std::uninitialized_fill(this->begin(), this->end(), Elt); } + template <typename in_iter> void assign(in_iter in_start, in_iter in_end) { + clear(); + append(in_start, in_end); + } + void assign(std::initializer_list<T> IL) { clear(); append(IL); diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h index 5aa813eb4832..2e716af1f60d 100644 --- a/include/llvm/Analysis/OrderedBasicBlock.h +++ b/include/llvm/Analysis/OrderedBasicBlock.h @@ -58,6 +58,7 @@ public: /// comes before \p B in \p BB. This is a simplification that considers /// cached instruction positions and ignores other basic blocks, being /// only relevant to compare relative instructions positions inside \p BB. + /// Returns false for A == B. bool dominates(const Instruction *A, const Instruction *B); }; diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h index b5f38139abf2..515b362e5407 100644 --- a/include/llvm/Analysis/RegionPass.h +++ b/include/llvm/Analysis/RegionPass.h @@ -78,6 +78,11 @@ public: return PMT_RegionPassManager; } //@} + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when optimization bisect is over the limit. + bool skipRegion(Region &R) const; }; /// @brief The pass manager to schedule RegionPasses. diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 1d715b590ab7..8ee9712b93d8 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -636,7 +636,7 @@ private: /// @} public: - BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {} + BackedgeTakenInfo() : MaxAndComplete(nullptr, 0), MaxOrZero(false) {} BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h index 21f847c7e5ba..8394b58d0a16 100644 --- a/include/llvm/CodeGen/MachineRegionInfo.h +++ b/include/llvm/CodeGen/MachineRegionInfo.h @@ -10,83 +10,77 @@ #ifndef LLVM_CODEGEN_MACHINEREGIONINFO_H #define LLVM_CODEGEN_MACHINEREGIONINFO_H +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" - +#include <cassert> namespace llvm { -class MachineDominatorTree; struct MachinePostDominatorTree; class MachineRegion; class MachineRegionNode; class MachineRegionInfo; -template<> -struct RegionTraits<MachineFunction> { - typedef MachineFunction FuncT; - typedef MachineBasicBlock BlockT; - typedef MachineRegion RegionT; - typedef MachineRegionNode RegionNodeT; - typedef MachineRegionInfo RegionInfoT; - typedef MachineDominatorTree DomTreeT; - typedef MachineDomTreeNode DomTreeNodeT; - typedef MachinePostDominatorTree PostDomTreeT; - typedef MachineDominanceFrontier DomFrontierT; - typedef MachineInstr InstT; - typedef MachineLoop LoopT; - typedef MachineLoopInfo LoopInfoT; +template <> struct RegionTraits<MachineFunction> { + using FuncT = MachineFunction; + using BlockT = MachineBasicBlock; + using RegionT = MachineRegion; + using RegionNodeT = MachineRegionNode; + using RegionInfoT = MachineRegionInfo; + using DomTreeT = MachineDominatorTree; + using DomTreeNodeT = MachineDomTreeNode; + using PostDomTreeT = MachinePostDominatorTree; + using DomFrontierT = MachineDominanceFrontier; + using InstT = MachineInstr; + using LoopT = MachineLoop; + using LoopInfoT = MachineLoopInfo; static unsigned getNumSuccessors(MachineBasicBlock *BB) { return BB->succ_size(); } }; - class MachineRegionNode : public RegionNodeBase<RegionTraits<MachineFunction>> { public: - inline MachineRegionNode(MachineRegion *Parent, - MachineBasicBlock *Entry, + inline MachineRegionNode(MachineRegion *Parent, MachineBasicBlock *Entry, bool isSubRegion = false) - : RegionNodeBase<RegionTraits<MachineFunction>>(Parent, Entry, isSubRegion) { - - } + : RegionNodeBase<RegionTraits<MachineFunction>>(Parent, Entry, + isSubRegion) {} bool operator==(const MachineRegion &RN) const { - return this == reinterpret_cast<const MachineRegionNode*>(&RN); + return this == reinterpret_cast<const MachineRegionNode *>(&RN); } }; class MachineRegion : public RegionBase<RegionTraits<MachineFunction>> { public: MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, - MachineRegionInfo* RI, - MachineDominatorTree *DT, MachineRegion *Parent = nullptr); + MachineRegionInfo *RI, MachineDominatorTree *DT, + MachineRegion *Parent = nullptr); ~MachineRegion(); bool operator==(const MachineRegionNode &RN) const { - return &RN == reinterpret_cast<const MachineRegionNode*>(this); + return &RN == reinterpret_cast<const MachineRegionNode *>(this); } }; class MachineRegionInfo : public RegionInfoBase<RegionTraits<MachineFunction>> { public: explicit MachineRegionInfo(); - ~MachineRegionInfo() override; // updateStatistics - Update statistic about created regions. void updateStatistics(MachineRegion *R) final; - void recalculate(MachineFunction &F, - MachineDominatorTree *DT, - MachinePostDominatorTree *PDT, - MachineDominanceFrontier *DF); + void recalculate(MachineFunction &F, MachineDominatorTree *DT, + MachinePostDominatorTree *PDT, MachineDominanceFrontier *DF); }; class MachineRegionInfoPass : public MachineFunctionPass { @@ -94,17 +88,13 @@ class MachineRegionInfoPass : public MachineFunctionPass { public: static char ID; - explicit MachineRegionInfoPass(); + explicit MachineRegionInfoPass(); ~MachineRegionInfoPass() override; - MachineRegionInfo &getRegionInfo() { - return RI; - } + MachineRegionInfo &getRegionInfo() { return RI; } - const MachineRegionInfo &getRegionInfo() const { - return RI; - } + const MachineRegionInfo &getRegionInfo() const { return RI; } /// @name MachineFunctionPass interface //@{ @@ -117,66 +107,76 @@ public: //@} }; - template <> template <> -inline MachineBasicBlock* RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineBasicBlock>() const { +inline MachineBasicBlock * +RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineBasicBlock>() + const { assert(!isSubRegion() && "This is not a MachineBasicBlock RegionNode!"); return getEntry(); } -template<> -template<> -inline MachineRegion* RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineRegion>() const { +template <> +template <> +inline MachineRegion * +RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineRegion>() + const { assert(isSubRegion() && "This is not a subregion RegionNode!"); - auto Unconst = const_cast<RegionNodeBase<RegionTraits<MachineFunction>>*>(this); - return reinterpret_cast<MachineRegion*>(Unconst); + auto Unconst = + const_cast<RegionNodeBase<RegionTraits<MachineFunction>> *>(this); + return reinterpret_cast<MachineRegion *>(Unconst); } - RegionNodeGraphTraits(MachineRegionNode, MachineBasicBlock, MachineRegion); -RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, MachineRegion); +RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, + MachineRegion); RegionGraphTraits(MachineRegion, MachineRegionNode); RegionGraphTraits(const MachineRegion, const MachineRegionNode); -template <> struct GraphTraits<MachineRegionInfo*> - : public GraphTraits<FlatIt<MachineRegionNode*> > { - typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, - GraphTraits<FlatIt<NodeRef>>> - nodes_iterator; +template <> +struct GraphTraits<MachineRegionInfo *> + : public GraphTraits<FlatIt<MachineRegionNode *>> { + using nodes_iterator = df_iterator<NodeRef, df_iterator_default_set<NodeRef>, + false, GraphTraits<FlatIt<NodeRef>>>; static NodeRef getEntryNode(MachineRegionInfo *RI) { - return GraphTraits<FlatIt<MachineRegion*> >::getEntryNode(RI->getTopLevelRegion()); + return GraphTraits<FlatIt<MachineRegion *>>::getEntryNode( + RI->getTopLevelRegion()); } - static nodes_iterator nodes_begin(MachineRegionInfo* RI) { + + static nodes_iterator nodes_begin(MachineRegionInfo *RI) { return nodes_iterator::begin(getEntryNode(RI)); } + static nodes_iterator nodes_end(MachineRegionInfo *RI) { return nodes_iterator::end(getEntryNode(RI)); } }; -template <> struct GraphTraits<MachineRegionInfoPass*> - : public GraphTraits<MachineRegionInfo *> { - typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false, - GraphTraits<FlatIt<NodeRef>>> - nodes_iterator; +template <> +struct GraphTraits<MachineRegionInfoPass *> + : public GraphTraits<MachineRegionInfo *> { + using nodes_iterator = df_iterator<NodeRef, df_iterator_default_set<NodeRef>, + false, GraphTraits<FlatIt<NodeRef>>>; static NodeRef getEntryNode(MachineRegionInfoPass *RI) { - return GraphTraits<MachineRegionInfo*>::getEntryNode(&RI->getRegionInfo()); + return GraphTraits<MachineRegionInfo *>::getEntryNode(&RI->getRegionInfo()); } - static nodes_iterator nodes_begin(MachineRegionInfoPass* RI) { - return GraphTraits<MachineRegionInfo*>::nodes_begin(&RI->getRegionInfo()); + + static nodes_iterator nodes_begin(MachineRegionInfoPass *RI) { + return GraphTraits<MachineRegionInfo *>::nodes_begin(&RI->getRegionInfo()); } + static nodes_iterator nodes_end(MachineRegionInfoPass *RI) { - return GraphTraits<MachineRegionInfo*>::nodes_end(&RI->getRegionInfo()); + return GraphTraits<MachineRegionInfo *>::nodes_end(&RI->getRegionInfo()); } }; extern template class RegionBase<RegionTraits<MachineFunction>>; extern template class RegionNodeBase<RegionTraits<MachineFunction>>; extern template class RegionInfoBase<RegionTraits<MachineFunction>>; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_MACHINEREGIONINFO_H diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 1026654da3d7..c027783aae55 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -14,11 +14,13 @@ #ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H #define LLVM_CODEGEN_MACHINEREGISTERINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -41,8 +43,8 @@ namespace llvm { class PSetIterator; /// Convenient type to represent either a register class or a register bank. -typedef PointerUnion<const TargetRegisterClass *, const RegisterBank *> - RegClassOrRegBank; +using RegClassOrRegBank = + PointerUnion<const TargetRegisterClass *, const RegisterBank *>; /// MachineRegisterInfo - Keep track of information for virtual and physical /// registers, including vreg register classes, use/def chains for registers, @@ -125,7 +127,7 @@ private: /// started. BitVector ReservedRegs; - typedef DenseMap<unsigned, LLT> VRegToTypeMap; + using VRegToTypeMap = DenseMap<unsigned, LLT>; /// Map generic virtual registers to their actual size. mutable std::unique_ptr<VRegToTypeMap> VRegToType; @@ -266,8 +268,8 @@ public: /// reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified /// register. - typedef defusechain_iterator<true,true,false,true,false,false> - reg_iterator; + using reg_iterator = + defusechain_iterator<true, true, false, true, false, false>; reg_iterator reg_begin(unsigned RegNo) const { return reg_iterator(getRegUseDefListHead(RegNo)); } @@ -279,8 +281,8 @@ public: /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses /// of the specified register, stepping by MachineInstr. - typedef defusechain_instr_iterator<true,true,false,false,true,false> - reg_instr_iterator; + using reg_instr_iterator = + defusechain_instr_iterator<true, true, false, false, true, false>; reg_instr_iterator reg_instr_begin(unsigned RegNo) const { return reg_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -295,8 +297,8 @@ public: /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses /// of the specified register, stepping by bundle. - typedef defusechain_instr_iterator<true,true,false,false,false,true> - reg_bundle_iterator; + using reg_bundle_iterator = + defusechain_instr_iterator<true, true, false, false, false, true>; reg_bundle_iterator reg_bundle_begin(unsigned RegNo) const { return reg_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -314,8 +316,8 @@ public: /// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses /// of the specified register, skipping those marked as Debug. - typedef defusechain_iterator<true,true,true,true,false,false> - reg_nodbg_iterator; + using reg_nodbg_iterator = + defusechain_iterator<true, true, true, true, false, false>; reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -331,8 +333,8 @@ public: /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk /// all defs and uses of the specified register, stepping by MachineInstr, /// skipping those marked as Debug. - typedef defusechain_instr_iterator<true,true,true,false,true,false> - reg_instr_nodbg_iterator; + using reg_instr_nodbg_iterator = + defusechain_instr_iterator<true, true, true, false, true, false>; reg_instr_nodbg_iterator reg_instr_nodbg_begin(unsigned RegNo) const { return reg_instr_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -348,8 +350,8 @@ public: /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk /// all defs and uses of the specified register, stepping by bundle, /// skipping those marked as Debug. - typedef defusechain_instr_iterator<true,true,true,false,false,true> - reg_bundle_nodbg_iterator; + using reg_bundle_nodbg_iterator = + defusechain_instr_iterator<true, true, true, false, false, true>; reg_bundle_nodbg_iterator reg_bundle_nodbg_begin(unsigned RegNo) const { return reg_bundle_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -369,8 +371,8 @@ public: } /// def_iterator/def_begin/def_end - Walk all defs of the specified register. - typedef defusechain_iterator<false,true,false,true,false,false> - def_iterator; + using def_iterator = + defusechain_iterator<false, true, false, true, false, false>; def_iterator def_begin(unsigned RegNo) const { return def_iterator(getRegUseDefListHead(RegNo)); } @@ -382,8 +384,8 @@ public: /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the /// specified register, stepping by MachineInst. - typedef defusechain_instr_iterator<false,true,false,false,true,false> - def_instr_iterator; + using def_instr_iterator = + defusechain_instr_iterator<false, true, false, false, true, false>; def_instr_iterator def_instr_begin(unsigned RegNo) const { return def_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -398,8 +400,8 @@ public: /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the /// specified register, stepping by bundle. - typedef defusechain_instr_iterator<false,true,false,false,false,true> - def_bundle_iterator; + using def_bundle_iterator = + defusechain_instr_iterator<false, true, false, false, false, true>; def_bundle_iterator def_bundle_begin(unsigned RegNo) const { return def_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -425,8 +427,8 @@ public: } /// use_iterator/use_begin/use_end - Walk all uses of the specified register. - typedef defusechain_iterator<true,false,false,true,false,false> - use_iterator; + using use_iterator = + defusechain_iterator<true, false, false, true, false, false>; use_iterator use_begin(unsigned RegNo) const { return use_iterator(getRegUseDefListHead(RegNo)); } @@ -438,8 +440,8 @@ public: /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the /// specified register, stepping by MachineInstr. - typedef defusechain_instr_iterator<true,false,false,false,true,false> - use_instr_iterator; + using use_instr_iterator = + defusechain_instr_iterator<true, false, false, false, true, false>; use_instr_iterator use_instr_begin(unsigned RegNo) const { return use_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -454,8 +456,8 @@ public: /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the /// specified register, stepping by bundle. - typedef defusechain_instr_iterator<true,false,false,false,false,true> - use_bundle_iterator; + using use_bundle_iterator = + defusechain_instr_iterator<true, false, false, false, false, true>; use_bundle_iterator use_bundle_begin(unsigned RegNo) const { return use_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -482,8 +484,8 @@ public: /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the /// specified register, skipping those marked as Debug. - typedef defusechain_iterator<true,false,true,true,false,false> - use_nodbg_iterator; + using use_nodbg_iterator = + defusechain_iterator<true, false, true, true, false, false>; use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const { return use_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -499,8 +501,8 @@ public: /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk /// all uses of the specified register, stepping by MachineInstr, skipping /// those marked as Debug. - typedef defusechain_instr_iterator<true,false,true,false,true,false> - use_instr_nodbg_iterator; + using use_instr_nodbg_iterator = + defusechain_instr_iterator<true, false, true, false, true, false>; use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const { return use_instr_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -516,8 +518,8 @@ public: /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk /// all uses of the specified register, stepping by bundle, skipping /// those marked as Debug. - typedef defusechain_instr_iterator<true,false,true,false,false,true> - use_bundle_nodbg_iterator; + using use_bundle_nodbg_iterator = + defusechain_instr_iterator<true, false, true, false, false, true>; use_bundle_nodbg_iterator use_bundle_nodbg_begin(unsigned RegNo) const { return use_bundle_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -593,7 +595,6 @@ public: /// Return the register class of the specified virtual register. /// This shouldn't be used directly unless \p Reg has a register class. /// \see getRegClassOrNull when this might happen. - /// const TargetRegisterClass *getRegClass(unsigned Reg) const { assert(VRegInfo[Reg].first.is<const TargetRegisterClass *>() && "Register class not set, wrong accessor"); @@ -620,7 +621,6 @@ public: /// a register bank or has been assigned a register class. /// \note It is possible to get the register bank from the register class via /// RegisterBankInfo::getRegBankFromRegClass. - /// const RegisterBank *getRegBankOrNull(unsigned Reg) const { const RegClassOrRegBank &Val = VRegInfo[Reg].first; return Val.dyn_cast<const RegisterBank *>(); @@ -629,17 +629,14 @@ public: /// Return the register bank or register class of \p Reg. /// \note Before the register bank gets assigned (i.e., before the /// RegBankSelect pass) \p Reg may not have either. - /// const RegClassOrRegBank &getRegClassOrRegBank(unsigned Reg) const { return VRegInfo[Reg].first; } /// setRegClass - Set the register class of the specified virtual register. - /// void setRegClass(unsigned Reg, const TargetRegisterClass *RC); /// Set the register bank to \p RegBank for \p Reg. - /// void setRegBank(unsigned Reg, const RegisterBank &RegBank); void setRegClassOrRegBank(unsigned Reg, @@ -653,7 +650,6 @@ public: /// new register class, or NULL if no such class exists. /// This should only be used when the constraint is known to be trivial, like /// GR32 -> GR32_NOSP. Beware of increasing register pressure. - /// const TargetRegisterClass *constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs = 0); @@ -665,12 +661,10 @@ public: /// This method can be used after constraints have been removed from a /// virtual register, for example after removing instructions or splitting /// the live range. - /// bool recomputeRegClass(unsigned Reg); /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. - /// unsigned createVirtualRegister(const TargetRegisterClass *RegClass); /// Accessor for VRegToType. This accessor should only be used @@ -704,7 +698,6 @@ public: unsigned createIncompleteVirtualRegister(); /// getNumVirtRegs - Return the number of virtual registers created. - /// unsigned getNumVirtRegs() const { return VRegInfo.size(); } /// clearVirtRegs - Remove all virtual registers (after physreg assignment). @@ -810,7 +803,6 @@ public: /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - /// bool isReserved(unsigned PhysReg) const { return getReservedRegs().test(PhysReg); } @@ -838,8 +830,8 @@ public: // Iteration support for the live-ins set. It's kept in sorted order // by register number. - typedef std::vector<std::pair<unsigned,unsigned>>::const_iterator - livein_iterator; + using livein_iterator = + std::vector<std::pair<unsigned,unsigned>>::const_iterator; livein_iterator livein_begin() const { return LiveIns.begin(); } livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } @@ -910,10 +902,10 @@ public: } public: - typedef std::iterator<std::forward_iterator_tag, - MachineInstr, ptrdiff_t>::reference reference; - typedef std::iterator<std::forward_iterator_tag, - MachineInstr, ptrdiff_t>::pointer pointer; + using reference = std::iterator<std::forward_iterator_tag, + MachineInstr, ptrdiff_t>::reference; + using pointer = std::iterator<std::forward_iterator_tag, + MachineInstr, ptrdiff_t>::pointer; defusechain_iterator() = default; @@ -1016,10 +1008,10 @@ public: } public: - typedef std::iterator<std::forward_iterator_tag, - MachineInstr, ptrdiff_t>::reference reference; - typedef std::iterator<std::forward_iterator_tag, - MachineInstr, ptrdiff_t>::pointer pointer; + using reference = std::iterator<std::forward_iterator_tag, + MachineInstr, ptrdiff_t>::reference; + using pointer = std::iterator<std::forward_iterator_tag, + MachineInstr, ptrdiff_t>::pointer; defusechain_instr_iterator() = default; diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 6b2a16e1d36e..3b02ec400aba 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -104,10 +104,15 @@ extern cl::opt<bool> ForceBottomUp; class LiveIntervals; class MachineDominatorTree; +class MachineFunction; +class MachineInstr; class MachineLoopInfo; class RegisterClassInfo; class SchedDFSResult; class ScheduleHazardRecognizer; +class TargetInstrInfo; +class TargetPassConfig; +class TargetRegisterInfo; /// MachineSchedContext provides enough context from the MachineScheduler pass /// for the target to instantiate a scheduler. @@ -129,10 +134,10 @@ struct MachineSchedContext { /// schedulers. class MachineSchedRegistry : public MachinePassRegistryNode { public: - typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *); + using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); // RegisterPassParser requires a (misnamed) FunctionPassCtor type. - typedef ScheduleDAGCtor FunctionPassCtor; + using FunctionPassCtor = ScheduleDAGCtor; static MachinePassRegistry Registry; @@ -527,7 +532,7 @@ public: unsigned size() const { return Queue.size(); } - typedef std::vector<SUnit*>::iterator iterator; + using iterator = std::vector<SUnit*>::iterator; iterator begin() { return Queue.begin(); } diff --git a/include/llvm/CodeGen/PBQP/CostAllocator.h b/include/llvm/CodeGen/PBQP/CostAllocator.h index 02d39fe383f1..bde451ae1fcc 100644 --- a/include/llvm/CodeGen/PBQP/CostAllocator.h +++ b/include/llvm/CodeGen/PBQP/CostAllocator.h @@ -1,4 +1,4 @@ -//===---------- CostAllocator.h - PBQP Cost Allocator -----------*- C++ -*-===// +//===- CostAllocator.h - PBQP Cost Allocator --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,26 +19,28 @@ #define LLVM_CODEGEN_PBQP_COSTALLOCATOR_H #include "llvm/ADT/DenseSet.h" +#include <algorithm> +#include <cstdint> #include <memory> -#include <type_traits> namespace llvm { namespace PBQP { -template <typename ValueT> -class ValuePool { +template <typename ValueT> class ValuePool { public: - typedef std::shared_ptr<const ValueT> PoolRef; + using PoolRef = std::shared_ptr<const ValueT>; private: - class PoolEntry : public std::enable_shared_from_this<PoolEntry> { public: template <typename ValueKeyT> PoolEntry(ValuePool &Pool, ValueKeyT Value) : Pool(Pool), Value(std::move(Value)) {} + ~PoolEntry() { Pool.removeEntry(this); } - const ValueT& getValue() const { return Value; } + + const ValueT &getValue() const { return Value; } + private: ValuePool &Pool; ValueT Value; @@ -46,10 +48,10 @@ private: class PoolEntryDSInfo { public: - static inline PoolEntry* getEmptyKey() { return nullptr; } + static inline PoolEntry *getEmptyKey() { return nullptr; } - static inline PoolEntry* getTombstoneKey() { - return reinterpret_cast<PoolEntry*>(static_cast<uintptr_t>(1)); + static inline PoolEntry *getTombstoneKey() { + return reinterpret_cast<PoolEntry *>(static_cast<uintptr_t>(1)); } template <typename ValueKeyT> @@ -66,8 +68,7 @@ private: } template <typename ValueKeyT1, typename ValueKeyT2> - static - bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) { + static bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) { return C1 == C2; } @@ -83,10 +84,9 @@ private: return P1 == P2; return isEqual(P1->getValue(), P2); } - }; - typedef DenseSet<PoolEntry*, PoolEntryDSInfo> EntrySetT; + using EntrySetT = DenseSet<PoolEntry *, PoolEntryDSInfo>; EntrySetT EntrySet; @@ -105,28 +105,31 @@ public: } }; -template <typename VectorT, typename MatrixT> -class PoolCostAllocator { +template <typename VectorT, typename MatrixT> class PoolCostAllocator { private: - typedef ValuePool<VectorT> VectorCostPool; - typedef ValuePool<MatrixT> MatrixCostPool; + using VectorCostPool = ValuePool<VectorT>; + using MatrixCostPool = ValuePool<MatrixT>; + public: - typedef VectorT Vector; - typedef MatrixT Matrix; - typedef typename VectorCostPool::PoolRef VectorPtr; - typedef typename MatrixCostPool::PoolRef MatrixPtr; + using Vector = VectorT; + using Matrix = MatrixT; + using VectorPtr = typename VectorCostPool::PoolRef; + using MatrixPtr = typename MatrixCostPool::PoolRef; + + template <typename VectorKeyT> VectorPtr getVector(VectorKeyT v) { + return VectorPool.getValue(std::move(v)); + } - template <typename VectorKeyT> - VectorPtr getVector(VectorKeyT v) { return VectorPool.getValue(std::move(v)); } + template <typename MatrixKeyT> MatrixPtr getMatrix(MatrixKeyT m) { + return MatrixPool.getValue(std::move(m)); + } - template <typename MatrixKeyT> - MatrixPtr getMatrix(MatrixKeyT m) { return MatrixPool.getValue(std::move(m)); } private: VectorCostPool VectorPool; MatrixCostPool MatrixPool; }; -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_PBQP_COSTALLOCATOR_H diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h index 83487e6a808a..e94878ced10d 100644 --- a/include/llvm/CodeGen/PBQP/Graph.h +++ b/include/llvm/CodeGen/PBQP/Graph.h @@ -1,4 +1,4 @@ -//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===// +//===- Graph.h - PBQP Graph -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,14 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_PBQP_GRAPH_H #define LLVM_CODEGEN_PBQP_GRAPH_H #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" #include <algorithm> #include <cassert> +#include <iterator> #include <limits> -#include <utility> #include <vector> namespace llvm { @@ -28,8 +26,8 @@ namespace PBQP { class GraphBase { public: - typedef unsigned NodeId; - typedef unsigned EdgeId; + using NodeId = unsigned; + using EdgeId = unsigned; /// @brief Returns a value representing an invalid (non-existent) node. static NodeId invalidNodeId() { @@ -48,32 +46,32 @@ namespace PBQP { template <typename SolverT> class Graph : public GraphBase { private: - typedef typename SolverT::CostAllocator CostAllocator; + using CostAllocator = typename SolverT::CostAllocator; + public: - typedef typename SolverT::RawVector RawVector; - typedef typename SolverT::RawMatrix RawMatrix; - typedef typename SolverT::Vector Vector; - typedef typename SolverT::Matrix Matrix; - typedef typename CostAllocator::VectorPtr VectorPtr; - typedef typename CostAllocator::MatrixPtr MatrixPtr; - typedef typename SolverT::NodeMetadata NodeMetadata; - typedef typename SolverT::EdgeMetadata EdgeMetadata; - typedef typename SolverT::GraphMetadata GraphMetadata; + using RawVector = typename SolverT::RawVector; + using RawMatrix = typename SolverT::RawMatrix; + using Vector = typename SolverT::Vector; + using Matrix = typename SolverT::Matrix; + using VectorPtr = typename CostAllocator::VectorPtr; + using MatrixPtr = typename CostAllocator::MatrixPtr; + using NodeMetadata = typename SolverT::NodeMetadata; + using EdgeMetadata = typename SolverT::EdgeMetadata; + using GraphMetadata = typename SolverT::GraphMetadata; private: - class NodeEntry { public: - typedef std::vector<EdgeId> AdjEdgeList; - typedef AdjEdgeList::size_type AdjEdgeIdx; - typedef AdjEdgeList::const_iterator AdjEdgeItr; + using AdjEdgeList = std::vector<EdgeId>; + using AdjEdgeIdx = AdjEdgeList::size_type; + using AdjEdgeItr = AdjEdgeList::const_iterator; + + NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {} static AdjEdgeIdx getInvalidAdjEdgeIdx() { return std::numeric_limits<AdjEdgeIdx>::max(); } - NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {} - AdjEdgeIdx addAdjEdgeId(EdgeId EId) { AdjEdgeIdx Idx = AdjEdgeIds.size(); AdjEdgeIds.push_back(EId); @@ -96,6 +94,7 @@ namespace PBQP { VectorPtr Costs; NodeMetadata Metadata; + private: AdjEdgeList AdjEdgeIds; }; @@ -150,8 +149,10 @@ namespace PBQP { NodeId getN1Id() const { return NIds[0]; } NodeId getN2Id() const { return NIds[1]; } + MatrixPtr Costs; EdgeMetadata Metadata; + private: NodeId NIds[2]; typename NodeEntry::AdjEdgeIdx ThisEdgeAdjIdxs[2]; @@ -161,18 +162,20 @@ namespace PBQP { GraphMetadata Metadata; CostAllocator CostAlloc; - SolverT *Solver; + SolverT *Solver = nullptr; - typedef std::vector<NodeEntry> NodeVector; - typedef std::vector<NodeId> FreeNodeVector; + using NodeVector = std::vector<NodeEntry>; + using FreeNodeVector = std::vector<NodeId>; NodeVector Nodes; FreeNodeVector FreeNodeIds; - typedef std::vector<EdgeEntry> EdgeVector; - typedef std::vector<EdgeId> FreeEdgeVector; + using EdgeVector = std::vector<EdgeEntry>; + using FreeEdgeVector = std::vector<EdgeId>; EdgeVector Edges; FreeEdgeVector FreeEdgeIds; + Graph(const Graph &Other) {} + // ----- INTERNAL METHODS ----- NodeEntry &getNode(NodeId NId) { @@ -220,20 +223,18 @@ namespace PBQP { return EId; } - Graph(const Graph &Other) {} void operator=(const Graph &Other) {} public: - - typedef typename NodeEntry::AdjEdgeItr AdjEdgeItr; + using AdjEdgeItr = typename NodeEntry::AdjEdgeItr; class NodeItr { public: - typedef std::forward_iterator_tag iterator_category; - typedef NodeId value_type; - typedef int difference_type; - typedef NodeId* pointer; - typedef NodeId& reference; + using iterator_category = std::forward_iterator_tag; + using value_type = NodeId; + using difference_type = int; + using pointer = NodeId *; + using reference = NodeId &; NodeItr(NodeId CurNId, const Graph &G) : CurNId(CurNId), EndNId(G.Nodes.size()), FreeNodeIds(G.FreeNodeIds) { @@ -283,53 +284,65 @@ namespace PBQP { class NodeIdSet { public: - NodeIdSet(const Graph &G) : G(G) { } + NodeIdSet(const Graph &G) : G(G) {} + NodeItr begin() const { return NodeItr(0, G); } NodeItr end() const { return NodeItr(G.Nodes.size(), G); } + bool empty() const { return G.Nodes.empty(); } + typename NodeVector::size_type size() const { return G.Nodes.size() - G.FreeNodeIds.size(); } + private: const Graph& G; }; class EdgeIdSet { public: - EdgeIdSet(const Graph &G) : G(G) { } + EdgeIdSet(const Graph &G) : G(G) {} + EdgeItr begin() const { return EdgeItr(0, G); } EdgeItr end() const { return EdgeItr(G.Edges.size(), G); } + bool empty() const { return G.Edges.empty(); } + typename NodeVector::size_type size() const { return G.Edges.size() - G.FreeEdgeIds.size(); } + private: const Graph& G; }; class AdjEdgeIdSet { public: - AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) { } + AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) {} + typename NodeEntry::AdjEdgeItr begin() const { return NE.getAdjEdgeIds().begin(); } + typename NodeEntry::AdjEdgeItr end() const { return NE.getAdjEdgeIds().end(); } + bool empty() const { return NE.getAdjEdgeIds().empty(); } + typename NodeEntry::AdjEdgeList::size_type size() const { return NE.getAdjEdgeIds().size(); } + private: const NodeEntry &NE; }; /// @brief Construct an empty PBQP graph. - Graph() : Solver(nullptr) {} + Graph() = default; /// @brief Construct an empty PBQP graph with the given graph metadata. - Graph(GraphMetadata Metadata) - : Metadata(std::move(Metadata)), Solver(nullptr) {} + Graph(GraphMetadata Metadata) : Metadata(std::move(Metadata)) {} /// @brief Get a reference to the graph metadata. GraphMetadata& getMetadata() { return Metadata; } @@ -656,7 +669,7 @@ namespace PBQP { } }; -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm #endif // LLVM_CODEGEN_PBQP_GRAPH_HPP diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index 278787550a43..ba405e816d10 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -1,4 +1,4 @@ -//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// +//===- Math.h - PBQP Vector and Matrix classes ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,20 +11,22 @@ #define LLVM_CODEGEN_PBQP_MATH_H #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include <algorithm> #include <cassert> #include <functional> +#include <memory> namespace llvm { namespace PBQP { -typedef float PBQPNum; +using PBQPNum = float; /// \brief PBQP Vector class. class Vector { friend hash_code hash_value(const Vector &); -public: +public: /// \brief Construct a PBQP vector of the given size. explicit Vector(unsigned Length) : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {} @@ -120,8 +122,8 @@ OStream& operator<<(OStream &OS, const Vector &V) { class Matrix { private: friend hash_code hash_value(const Matrix &); -public: +public: /// \brief Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) { @@ -253,9 +255,11 @@ OStream& operator<<(OStream &OS, const Matrix &M) { template <typename Metadata> class MDVector : public Vector { public: - MDVector(const Vector &v) : Vector(v), md(*this) { } + MDVector(const Vector &v) : Vector(v), md(*this) {} MDVector(Vector &&v) : Vector(std::move(v)), md(*this) { } + const Metadata& getMetadata() const { return md; } + private: Metadata md; }; @@ -268,9 +272,11 @@ inline hash_code hash_value(const MDVector<Metadata> &V) { template <typename Metadata> class MDMatrix : public Matrix { public: - MDMatrix(const Matrix &m) : Matrix(m), md(*this) { } + MDMatrix(const Matrix &m) : Matrix(m), md(*this) {} MDMatrix(Matrix &&m) : Matrix(std::move(m)), md(*this) { } + const Metadata& getMetadata() const { return md; } + private: Metadata md; }; @@ -280,7 +286,7 @@ inline hash_code hash_value(const MDMatrix<Metadata> &M) { return hash_value(static_cast<const Matrix&>(M)); } -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm #endif // LLVM_CODEGEN_PBQP_MATH_H diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h index d4a544bfe721..8aeb51936760 100644 --- a/include/llvm/CodeGen/PBQP/ReductionRules.h +++ b/include/llvm/CodeGen/PBQP/ReductionRules.h @@ -1,4 +1,4 @@ -//===----------- ReductionRules.h - Reduction Rules -------------*- C++ -*-===// +//===- ReductionRules.h - Reduction Rules -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,8 @@ #include "Graph.h" #include "Math.h" #include "Solution.h" +#include <cassert> +#include <limits> namespace llvm { namespace PBQP { @@ -27,11 +29,11 @@ namespace PBQP { /// neighbor. Notify the problem domain. template <typename GraphT> void applyR1(GraphT &G, typename GraphT::NodeId NId) { - typedef typename GraphT::NodeId NodeId; - typedef typename GraphT::EdgeId EdgeId; - typedef typename GraphT::Vector Vector; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawVector RawVector; + using NodeId = typename GraphT::NodeId; + using EdgeId = typename GraphT::EdgeId; + using Vector = typename GraphT::Vector; + using Matrix = typename GraphT::Matrix; + using RawVector = typename GraphT::RawVector; assert(G.getNodeDegree(NId) == 1 && "R1 applied to node with degree != 1."); @@ -71,11 +73,11 @@ namespace PBQP { template <typename GraphT> void applyR2(GraphT &G, typename GraphT::NodeId NId) { - typedef typename GraphT::NodeId NodeId; - typedef typename GraphT::EdgeId EdgeId; - typedef typename GraphT::Vector Vector; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawMatrix RawMatrix; + using NodeId = typename GraphT::NodeId; + using EdgeId = typename GraphT::EdgeId; + using Vector = typename GraphT::Vector; + using Matrix = typename GraphT::Matrix; + using RawMatrix = typename GraphT::RawMatrix; assert(G.getNodeDegree(NId) == 2 && "R2 applied to node with degree != 2."); @@ -177,9 +179,9 @@ namespace PBQP { // state. template <typename GraphT, typename StackT> Solution backpropagate(GraphT& G, StackT stack) { - typedef GraphBase::NodeId NodeId; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawVector RawVector; + using NodeId = GraphBase::NodeId; + using Matrix = typename GraphT::Matrix; + using RawVector = typename GraphT::RawVector; Solution s; @@ -215,7 +217,7 @@ namespace PBQP { return s; } -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_PBQP_REDUCTIONRULES_H diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h index d96b5eac4520..8d5d2374679d 100644 --- a/include/llvm/CodeGen/PBQP/Solution.h +++ b/include/llvm/CodeGen/PBQP/Solution.h @@ -26,7 +26,7 @@ namespace PBQP { /// To get the selection for each node in the problem use the getSelection method. class Solution { private: - typedef std::map<GraphBase::NodeId, unsigned> SelectionsMap; + using SelectionsMap = std::map<GraphBase::NodeId, unsigned>; SelectionsMap selections; unsigned r0Reductions = 0; diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h index 833b9bad613f..269b7a7b3a35 100644 --- a/include/llvm/CodeGen/PBQPRAConstraint.h +++ b/include/llvm/CodeGen/PBQPRAConstraint.h @@ -1,4 +1,4 @@ -//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===// +//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,22 @@ #ifndef LLVM_CODEGEN_PBQPRACONSTRAINT_H #define LLVM_CODEGEN_PBQPRACONSTRAINT_H +#include <algorithm> #include <memory> #include <vector> namespace llvm { + namespace PBQP { namespace RegAlloc { + // Forward declare PBQP graph class. class PBQPRAGraph; -} -} -class LiveIntervals; -class MachineBlockFrequencyInfo; -class MachineFunction; -class TargetRegisterInfo; +} // end namespace RegAlloc +} // end namespace PBQP -typedef PBQP::RegAlloc::PBQPRAGraph PBQPRAGraph; +using PBQPRAGraph = PBQP::RegAlloc::PBQPRAGraph; /// @brief Abstract base for classes implementing PBQP register allocation /// constraints (e.g. Spill-costs, interference, coalescing). @@ -40,6 +39,7 @@ class PBQPRAConstraint { public: virtual ~PBQPRAConstraint() = 0; virtual void apply(PBQPRAGraph &G) = 0; + private: virtual void anchor(); }; @@ -59,11 +59,13 @@ public: if (C) Constraints.push_back(std::move(C)); } + private: std::vector<std::unique_ptr<PBQPRAConstraint>> Constraints; + void anchor() override; }; -} +} // end namespace llvm -#endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */ +#endif // LLVM_CODEGEN_PBQPRACONSTRAINT_H diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 3bcfc1c4254b..96cfce5b84df 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -140,6 +140,9 @@ namespace llvm { /// Greedy register allocator. extern char &RAGreedyID; + /// Basic register allocator. + extern char &RABasicID; + /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as /// assigned in VirtRegMap. extern char &VirtRegRewriterID; diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h index 8872a5dc54a1..5b342863eb50 100644 --- a/include/llvm/CodeGen/RegAllocPBQP.h +++ b/include/llvm/CodeGen/RegAllocPBQP.h @@ -130,10 +130,10 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) { /// \brief Holds graph-level metadata relevant to PBQP RA problems. class GraphMetadata { private: - typedef ValuePool<AllowedRegVector> AllowedRegVecPool; + using AllowedRegVecPool = ValuePool<AllowedRegVector>; public: - typedef AllowedRegVecPool::PoolRef AllowedRegVecRef; + using AllowedRegVecRef = AllowedRegVecPool::PoolRef; GraphMetadata(MachineFunction &MF, LiveIntervals &LIS, @@ -167,17 +167,17 @@ private: /// \brief Holds solver state and other metadata relevant to each PBQP RA node. class NodeMetadata { public: - typedef RegAlloc::AllowedRegVector AllowedRegVector; + using AllowedRegVector = RegAlloc::AllowedRegVector; // The node's reduction state. The order in this enum is important, // as it is assumed nodes can only progress up (i.e. towards being // optimally reducible) when reducing the graph. - typedef enum { + using ReductionState = enum { Unprocessed, NotProvablyAllocatable, ConservativelyAllocatable, OptimallyReducible - } ReductionState; + }; NodeMetadata() = default; @@ -267,23 +267,23 @@ private: class RegAllocSolverImpl { private: - typedef MDMatrix<MatrixMetadata> RAMatrix; + using RAMatrix = MDMatrix<MatrixMetadata>; public: - typedef PBQP::Vector RawVector; - typedef PBQP::Matrix RawMatrix; - typedef PBQP::Vector Vector; - typedef RAMatrix Matrix; - typedef PBQP::PoolCostAllocator<Vector, Matrix> CostAllocator; + using RawVector = PBQP::Vector; + using RawMatrix = PBQP::Matrix; + using Vector = PBQP::Vector; + using Matrix = RAMatrix; + using CostAllocator = PBQP::PoolCostAllocator<Vector, Matrix>; - typedef GraphBase::NodeId NodeId; - typedef GraphBase::EdgeId EdgeId; + using NodeId = GraphBase::NodeId; + using EdgeId = GraphBase::EdgeId; - typedef RegAlloc::NodeMetadata NodeMetadata; - struct EdgeMetadata { }; - typedef RegAlloc::GraphMetadata GraphMetadata; + using NodeMetadata = RegAlloc::NodeMetadata; + struct EdgeMetadata {}; + using GraphMetadata = RegAlloc::GraphMetadata; - typedef PBQP::Graph<RegAllocSolverImpl> Graph; + using Graph = PBQP::Graph<RegAllocSolverImpl>; RegAllocSolverImpl(Graph &G) : G(G) {} @@ -426,7 +426,7 @@ private: std::vector<GraphBase::NodeId> reduce() { assert(!G.empty() && "Cannot reduce empty graph."); - typedef GraphBase::NodeId NodeId; + using NodeId = GraphBase::NodeId; std::vector<NodeId> NodeStack; // Consume worklists. @@ -459,7 +459,6 @@ private: ConservativelyAllocatableNodes.erase(NItr); NodeStack.push_back(NId); G.disconnectAllNeighborsFromNode(NId); - } else if (!NotProvablyAllocatableNodes.empty()) { NodeSet::iterator NItr = std::min_element(NotProvablyAllocatableNodes.begin(), @@ -493,7 +492,7 @@ private: }; Graph& G; - typedef std::set<NodeId> NodeSet; + using NodeSet = std::set<NodeId>; NodeSet OptimallyReducibleNodes; NodeSet ConservativelyAllocatableNodes; NodeSet NotProvablyAllocatableNodes; @@ -501,7 +500,7 @@ private: class PBQPRAGraph : public PBQP::Graph<RegAllocSolverImpl> { private: - typedef PBQP::Graph<RegAllocSolverImpl> BaseT; + using BaseT = PBQP::Graph<RegAllocSolverImpl>; public: PBQPRAGraph(GraphMetadata Metadata) : BaseT(std::move(Metadata)) {} diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 1f939e72e139..ad1efe18c72d 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -204,6 +204,10 @@ private: void setLiveInsUsed(const MachineBasicBlock &MBB); }; +/// Replaces all frame index virtual registers with physical registers. Uses the +/// register scavenger to find an appropriate register to use. +void scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS); + } // end namespace llvm #endif // LLVM_CODEGEN_REGISTERSCAVENGING_H diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index f5f5bfd45e79..d62bb9bf0b75 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -1,4 +1,4 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,22 +15,38 @@ #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H -#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/Compiler.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> #include <list> +#include <utility> +#include <vector> namespace llvm { + + class LiveIntervals; class MachineFrameInfo; + class MachineFunction; + class MachineInstr; class MachineLoopInfo; - class MachineDominatorTree; - class RegPressureTracker; + class MachineOperand; + struct MCSchedClassDesc; class PressureDiffs; + class PseudoSourceValue; + class RegPressureTracker; + class UndefValue; + class Value; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { @@ -70,31 +86,34 @@ namespace llvm { /// Use a SparseMultiSet to track physical registers. Storage is only /// allocated once for the pass. It can be cleared in constant time and reused /// without any frees. - typedef SparseMultiSet<PhysRegSUOper, llvm::identity<unsigned>, uint16_t> - Reg2SUnitsMap; + using Reg2SUnitsMap = + SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; /// Use SparseSet as a SparseMap by relying on the fact that it never /// compares ValueT's, only unsigned keys. This allows the set to be cleared /// between scheduling regions in constant time as long as ValueT does not /// require a destructor. - typedef SparseSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMap; + using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>; /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap; + using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; + + using VReg2SUnitOperIdxMultiMap = + SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; - typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor> - VReg2SUnitOperIdxMultiMap; + using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; - typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObject(ValueType V, bool MayAlias) : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} + ValueType getValue() const { return getPointer(); } bool mayAlias() const { return getInt(); } }; - typedef SmallVector<UnderlyingObject, 4> UnderlyingObjectsVector; + + using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; /// A ScheduleDAG for scheduling lists of MachineInstr. class ScheduleDAGInstrs : public ScheduleDAG { @@ -114,10 +133,10 @@ namespace llvm { /// reordering. A specialized scheduler can override /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate /// it has taken responsibility for scheduling the terminator correctly. - bool CanHandleTerminators; + bool CanHandleTerminators = false; /// Whether lane masks should get tracked. - bool TrackLaneMasks; + bool TrackLaneMasks = false; // State specific to the current scheduling region. // ------------------------------------------------ @@ -155,12 +174,12 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep; + AliasAnalysis *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special /// case of a huge region that gets reduced). - SUnit *BarrierChain; + SUnit *BarrierChain = nullptr; public: /// A list of SUnits, used in Value2SUsMap, during DAG construction. @@ -168,7 +187,7 @@ namespace llvm { /// implementation of this data structure, such as a singly linked list /// with a memory pool (SmallVector was tried but slow and SparseSet is not /// applicable). - typedef std::list<SUnit *> SUList; + using SUList = std::list<SUnit *>; protected: /// \brief A map from ValueType to SUList, used during DAG construction, as @@ -216,13 +235,13 @@ namespace llvm { /// For an unanalyzable memory access, this Value is used in maps. UndefValue *UnknownValue; - typedef std::vector<std::pair<MachineInstr *, MachineInstr *>> - DbgValueVector; + using DbgValueVector = + std::vector<std::pair<MachineInstr *, MachineInstr *>>; /// Remember instruction that precedes DBG_VALUE. /// These are generated by buildSchedGraph but persist so they can be /// referenced when emitting the final schedule. DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; + MachineInstr *FirstDbgValue = nullptr; /// Set of live physical registers for updating kill flags. LivePhysRegs LiveRegs; @@ -232,7 +251,7 @@ namespace llvm { const MachineLoopInfo *mli, bool RemoveKillFlags = false); - ~ScheduleDAGInstrs() override {} + ~ScheduleDAGInstrs() override = default; /// Gets the machine model for instruction scheduling. const TargetSchedModel *getSchedModel() const { return &SchedModel; } @@ -354,6 +373,7 @@ namespace llvm { return nullptr; return I->second; } + } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 493122b15704..4b1a375abd57 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------*- C++ -*-===// +//===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,35 +15,72 @@ #ifndef LLVM_CODEGEN_SELECTIONDAG_H #define LLVM_CODEGEN_SELECTIONDAG_H +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/ilist.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Target/TargetMachine.h" +#include <algorithm> #include <cassert> +#include <cstdint> +#include <functional> #include <map> #include <string> +#include <tuple> +#include <utility> #include <vector> namespace llvm { +class BlockAddress; +class Constant; +class ConstantFP; +class ConstantInt; +class DataLayout; +struct fltSemantics; +class GlobalValue; struct KnownBits; +class LLVMContext; +class MachineBasicBlock; class MachineConstantPoolValue; -class MachineFunction; -class MDNode; +class MCSymbol; class OptimizationRemarkEmitter; class SDDbgValue; -class TargetLowering; +class SelectionDAG; class SelectionDAGTargetInfo; +class TargetLowering; +class TargetMachine; +class TargetSubtargetInfo; +class Value; class SDVTListNode : public FoldingSetNode { friend struct FoldingSetTrait<SDVTListNode>; + /// A reference to an Interned FoldingSetNodeID for this node. /// The Allocator in SelectionDAG holds the data. /// SDVTList contains all types which are frequently accessed in SelectionDAG. @@ -55,11 +92,13 @@ class SDVTListNode : public FoldingSetNode { /// The hash value for SDVTList is fixed, so cache it to avoid /// hash calculation. unsigned HashValue; + public: SDVTListNode(const FoldingSetNodeIDRef ID, const EVT *VT, unsigned int Num) : FastID(ID), VTs(VT), NumVTs(Num) { HashValue = ID.ComputeHash(); } + SDVTList getSDVTList() { SDVTList result = {VTs, NumVTs}; return result; @@ -72,12 +111,14 @@ template<> struct FoldingSetTrait<SDVTListNode> : DefaultFoldingSetTrait<SDVTLis static void Profile(const SDVTListNode &X, FoldingSetNodeID& ID) { ID = X.FastID; } + static bool Equals(const SDVTListNode &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) { if (X.HashValue != IDHash) return false; return ID == X.FastID; } + static unsigned ComputeHash(const SDVTListNode &X, FoldingSetNodeID &TempID) { return X.HashValue; } @@ -104,13 +145,13 @@ class SDDbgInfo { BumpPtrAllocator Alloc; SmallVector<SDDbgValue*, 32> DbgValues; SmallVector<SDDbgValue*, 32> ByvalParmDbgValues; - typedef DenseMap<const SDNode*, SmallVector<SDDbgValue*, 2> > DbgValMapType; + using DbgValMapType = DenseMap<const SDNode *, SmallVector<SDDbgValue *, 2>>; DbgValMapType DbgValMap; - void operator=(const SDDbgInfo&) = delete; - SDDbgInfo(const SDDbgInfo&) = delete; public: - SDDbgInfo() {} + SDDbgInfo() = default; + SDDbgInfo(const SDDbgInfo &) = delete; + SDDbgInfo &operator=(const SDDbgInfo &) = delete; void add(SDDbgValue *V, const SDNode *Node, bool isParameter) { if (isParameter) { @@ -144,14 +185,14 @@ public: return ArrayRef<SDDbgValue*>(); } - typedef SmallVectorImpl<SDDbgValue*>::iterator DbgIterator; + using DbgIterator = SmallVectorImpl<SDDbgValue*>::iterator; + DbgIterator DbgBegin() { return DbgValues.begin(); } DbgIterator DbgEnd() { return DbgValues.end(); } DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); } DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); } }; -class SelectionDAG; void checkForCycles(const SelectionDAG *DAG, bool force = false); /// This is used to represent a portion of an LLVM function in a low-level @@ -167,8 +208,8 @@ void checkForCycles(const SelectionDAG *DAG, bool force = false); /// class SelectionDAG { const TargetMachine &TM; - const SelectionDAGTargetInfo *TSI; - const TargetLowering *TLI; + const SelectionDAGTargetInfo *TSI = nullptr; + const TargetLowering *TLI = nullptr; MachineFunction *MF; LLVMContext *Context; CodeGenOpt::Level OptLevel; @@ -188,9 +229,9 @@ class SelectionDAG { /// The AllocatorType for allocating SDNodes. We use /// pool allocation with recycling. - typedef RecyclingAllocator<BumpPtrAllocator, SDNode, sizeof(LargestSDNode), - alignof(MostAlignedSDNode)> - NodeAllocatorType; + using NodeAllocatorType = RecyclingAllocator<BumpPtrAllocator, SDNode, + sizeof(LargestSDNode), + alignof(MostAlignedSDNode)>; /// Pool allocation for nodes. NodeAllocatorType NodeAllocator; @@ -243,9 +284,11 @@ public: struct DAGNodeDeletedListener : public DAGUpdateListener { std::function<void(SDNode *, SDNode *)> Callback; + DAGNodeDeletedListener(SelectionDAG &DAG, std::function<void(SDNode *, SDNode *)> Callback) : DAGUpdateListener(DAG), Callback(std::move(Callback)) {} + void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); } }; @@ -254,7 +297,7 @@ public: /// have legal types. This is important after type legalization since /// any illegally typed nodes generated after this point will not experience /// type legalization. - bool NewNodesMustHaveLegalTypes; + bool NewNodesMustHaveLegalTypes = false; private: /// DAGUpdateListener is a friend so it can manipulate the listener stack. @@ -262,7 +305,7 @@ private: /// Linked list of registered DAGUpdateListener instances. /// This stack is maintained by DAGUpdateListener RAII. - DAGUpdateListener *UpdateListeners; + DAGUpdateListener *UpdateListeners = nullptr; /// Implementation of setSubgraphColor. /// Return whether we had to truncate the search. @@ -316,11 +359,10 @@ private: Node->OperandList = nullptr; } - void operator=(const SelectionDAG&) = delete; - SelectionDAG(const SelectionDAG&) = delete; - public: - explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level); + explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); + SelectionDAG(const SelectionDAG &) = delete; + SelectionDAG &operator=(const SelectionDAG &) = delete; ~SelectionDAG(); /// Prepare this SelectionDAG to process code in the given MachineFunction. @@ -364,12 +406,16 @@ public: /// Convenience for setting subgraph color attribute. void setSubgraphColor(SDNode *N, const char *Color); - typedef ilist<SDNode>::const_iterator allnodes_const_iterator; + using allnodes_const_iterator = ilist<SDNode>::const_iterator; + allnodes_const_iterator allnodes_begin() const { return AllNodes.begin(); } allnodes_const_iterator allnodes_end() const { return AllNodes.end(); } - typedef ilist<SDNode>::iterator allnodes_iterator; + + using allnodes_iterator = ilist<SDNode>::iterator; + allnodes_iterator allnodes_begin() { return AllNodes.begin(); } allnodes_iterator allnodes_end() { return AllNodes.end(); } + ilist<SDNode>::size_type allnodes_size() const { return AllNodes.size(); } @@ -475,7 +521,6 @@ public: //===--------------------------------------------------------------------===// // Node creation methods. - // /// \brief Create a ConstantSDNode wrapping a constant value. /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR. @@ -1251,9 +1296,11 @@ public: SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); } SDDbgInfo::DbgIterator DbgEnd() { return DbgInfo->DbgEnd(); } + SDDbgInfo::DbgIterator ByvalParmDbgBegin() { return DbgInfo->ByvalParmDbgBegin(); } + SDDbgInfo::DbgIterator ByvalParmDbgEnd() { return DbgInfo->ByvalParmDbgEnd(); } @@ -1479,10 +1526,12 @@ private: }; template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> { - typedef pointer_iterator<SelectionDAG::allnodes_iterator> nodes_iterator; + using nodes_iterator = pointer_iterator<SelectionDAG::allnodes_iterator>; + static nodes_iterator nodes_begin(SelectionDAG *G) { return nodes_iterator(G->allnodes_begin()); } + static nodes_iterator nodes_end(SelectionDAG *G) { return nodes_iterator(G->allnodes_end()); } @@ -1493,7 +1542,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, ArrayRef<SDValue> Ops, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO) { - /// Compose node ID and try to find an existing node. FoldingSetNodeID ID; unsigned Opcode = @@ -1524,6 +1572,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, return SDValue(N, 0); } -} // end namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SELECTIONDAG_H diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 973c5aac5281..3a4feb322092 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -37,6 +37,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -53,14 +54,18 @@ namespace llvm { -class SelectionDAG; +class APInt; +class Constant; +template <typename T> struct DenseMapInfo; class GlobalValue; class MachineBasicBlock; class MachineConstantPoolValue; +class MCSymbol; +class raw_ostream; class SDNode; +class SelectionDAG; +class Type; class Value; -class MCSymbol; -template <typename T> struct DenseMapInfo; void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, bool force = false); @@ -229,13 +234,15 @@ template <> struct isPodLike<SDValue> { static const bool value = true; }; /// Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. template<> struct simplify_type<SDValue> { - typedef SDNode* SimpleType; + using SimpleType = SDNode *; + static SimpleType getSimplifiedValue(SDValue &Val) { return Val.getNode(); } }; template<> struct simplify_type<const SDValue> { - typedef /*const*/ SDNode* SimpleType; + using SimpleType = /*const*/ SDNode *; + static SimpleType getSimplifiedValue(const SDValue &Val) { return Val.getNode(); } @@ -330,7 +337,8 @@ private: /// simplify_type specializations - Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. template<> struct simplify_type<SDUse> { - typedef SDNode* SimpleType; + using SimpleType = SDNode *; + static SimpleType getSimplifiedValue(SDUse &Val) { return Val.getNode(); } @@ -695,10 +703,10 @@ public: explicit use_iterator(SDUse *op) : Op(op) {} public: - typedef std::iterator<std::forward_iterator_tag, - SDUse, ptrdiff_t>::reference reference; - typedef std::iterator<std::forward_iterator_tag, - SDUse, ptrdiff_t>::pointer pointer; + using reference = std::iterator<std::forward_iterator_tag, + SDUse, ptrdiff_t>::reference; + using pointer = std::iterator<std::forward_iterator_tag, + SDUse, ptrdiff_t>::pointer; use_iterator() = default; use_iterator(const use_iterator &I) : Op(I.Op) {} @@ -824,7 +832,7 @@ public: return OperandList[Num]; } - typedef SDUse* op_iterator; + using op_iterator = SDUse *; op_iterator op_begin() const { return OperandList; } op_iterator op_end() const { return OperandList+NumOperands; } @@ -896,7 +904,8 @@ public: return getValueType(ResNo).getSizeInBits(); } - typedef const EVT* value_iterator; + using value_iterator = const EVT *; + value_iterator value_begin() const { return ValueList; } value_iterator value_end() const { return ValueList+NumValues; } @@ -1822,8 +1831,7 @@ class BlockAddressSDNode : public SDNode { BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, int64_t o, unsigned char Flags) : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), - BA(ba), Offset(o), TargetFlags(Flags) { - } + BA(ba), Offset(o), TargetFlags(Flags) {} public: const BlockAddress *getBlockAddress() const { return BA; } @@ -2154,7 +2162,7 @@ public: /// instruction selection proper phase. class MachineSDNode : public SDNode { public: - typedef MachineMemOperand **mmo_iterator; + using mmo_iterator = MachineMemOperand **; private: friend class SelectionDAG; @@ -2226,8 +2234,8 @@ public: }; template <> struct GraphTraits<SDNode*> { - typedef SDNode *NodeRef; - typedef SDNodeIterator ChildIteratorType; + using NodeRef = SDNode *; + using ChildIteratorType = SDNodeIterator; static NodeRef getEntryNode(SDNode *N) { return N; } @@ -2244,12 +2252,12 @@ template <> struct GraphTraits<SDNode*> { /// /// This needs to be a union because the largest node differs on 32 bit systems /// with 4 and 8 byte pointer alignment, respectively. -typedef AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, - BlockAddressSDNode, GlobalAddressSDNode> - LargestSDNode; +using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, + BlockAddressSDNode, + GlobalAddressSDNode>; /// The SDNode class with the greatest alignment requirement. -typedef GlobalAddressSDNode MostAlignedSDNode; +using MostAlignedSDNode = GlobalAddressSDNode; namespace ISD { diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 14fc3a499a08..a275b2721b44 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -20,17 +20,26 @@ #define LLVM_CODEGEN_SLOTINDEXES_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ilist.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/ilist.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> namespace llvm { +class raw_ostream; + /// This class represents an entry in the slot index list held in the /// SlotIndexes pass. It should not be used directly. See the /// SlotIndex & SlotIndexes classes for the public interface to this @@ -40,7 +49,6 @@ namespace llvm { unsigned index; public: - IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {} MachineInstr* getInstr() const { return mi; } @@ -301,7 +309,7 @@ namespace llvm { return os; } - typedef std::pair<SlotIndex, MachineBasicBlock*> IdxMBBPair; + using IdxMBBPair = std::pair<SlotIndex, MachineBasicBlock *>; inline bool operator<(SlotIndex V, const IdxMBBPair &IM) { return V < IM.first; @@ -325,7 +333,7 @@ namespace llvm { // IndexListEntry allocator. BumpPtrAllocator ileAllocator; - typedef ilist<IndexListEntry> IndexList; + using IndexList = ilist<IndexListEntry>; IndexList indexList; #ifdef EXPENSIVE_CHECKS @@ -334,7 +342,7 @@ namespace llvm { MachineFunction *mf; - typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap; + using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>; Mi2IndexMap mi2iMap; /// MBBRanges - Map MBB number to (start, stop) indexes. @@ -436,7 +444,7 @@ namespace llvm { const MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "MI must be inserted inna basic block"); MachineBasicBlock::const_iterator I = MI, B = MBB->begin(); - for (;;) { + while (true) { if (I == B) return getMBBStartIdx(MBB); --I; @@ -453,7 +461,7 @@ namespace llvm { const MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "MI must be inserted inna basic block"); MachineBasicBlock::const_iterator I = MI, E = MBB->end(); - for (;;) { + while (true) { ++I; if (I == E) return getMBBEndIdx(MBB); @@ -497,21 +505,25 @@ namespace llvm { /// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block /// begin and basic block) - typedef SmallVectorImpl<IdxMBBPair>::const_iterator MBBIndexIterator; + using MBBIndexIterator = SmallVectorImpl<IdxMBBPair>::const_iterator; + /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or /// equal to \p To. MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const { return std::lower_bound(I, idx2MBBMap.end(), To); } + /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex /// that is greater or equal to \p Idx. MBBIndexIterator findMBBIndex(SlotIndex Idx) const { return advanceMBBIndex(idx2MBBMap.begin(), Idx); } + /// Returns an iterator for the begin of the idx2MBBMap. MBBIndexIterator MBBIndexBegin() const { return idx2MBBMap.begin(); } + /// Return an iterator for the end of the idx2MBBMap. MBBIndexIterator MBBIndexEnd() const { return idx2MBBMap.end(); diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index a18936feea7b..8263946ed928 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -145,21 +145,27 @@ public: /// /// Statepoint operands take the form: /// <id>, <num patch bytes >, <num call arguments>, <call target>, -/// [call arguments], <StackMaps::ConstantOp>, <calling convention>, +/// [call arguments...], +/// <StackMaps::ConstantOp>, <calling convention>, /// <StackMaps::ConstantOp>, <statepoint flags>, -/// <StackMaps::ConstantOp>, <num other args>, [other args], -/// [gc values] +/// <StackMaps::ConstantOp>, <num deopt args>, [deopt args...], +/// <gc base/derived pairs...> <gc allocas...> +/// Note that the last two sets of arguments are not currently length +/// prefixed. class StatepointOpers { -private: + // TODO:: we should change the STATEPOINT representation so that CC and + // Flags should be part of meta operands, with args and deopt operands, and + // gc operands all prefixed by their length and a type code. This would be + // much more consistent. +public: // These values are aboolute offsets into the operands of the statepoint // instruction. enum { IDPos, NBytesPos, NCallArgsPos, CallTargetPos, MetaEnd }; // These values are relative offests from the start of the statepoint meta // arguments (i.e. the end of the call arguments). - enum { CCOffset = 1, FlagsOffset = 3, NumVMSArgsOffset = 5 }; + enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 }; -public: explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {} /// Get starting index of non call related arguments @@ -220,7 +226,7 @@ public: // OpTypes are used to encode information about the following logical // operand (which may consist of several MachineOperands) for the // OpParser. - typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType; + using OpType = enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp }; StackMaps(AsmPrinter &AP); @@ -248,9 +254,10 @@ public: private: static const char *WSMP; - typedef SmallVector<Location, 8> LocationVec; - typedef SmallVector<LiveOutReg, 8> LiveOutVec; - typedef MapVector<uint64_t, uint64_t> ConstantPool; + + using LocationVec = SmallVector<Location, 8>; + using LiveOutVec = SmallVector<LiveOutReg, 8>; + using ConstantPool = MapVector<uint64_t, uint64_t>; struct FunctionInfo { uint64_t StackSize = 0; @@ -273,8 +280,8 @@ private: LiveOuts(std::move(LiveOuts)) {} }; - typedef MapVector<const MCSymbol *, FunctionInfo> FnInfoMap; - typedef std::vector<CallsiteInfo> CallsiteInfoList; + using FnInfoMap = MapVector<const MCSymbol *, FunctionInfo>; + using CallsiteInfoList = std::vector<CallsiteInfo>; AsmPrinter &AP; CallsiteInfoList CSInfos; diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 1992412120aa..4365fca74bf1 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -55,6 +55,9 @@ public: /// Return the MCSchedClassDesc for this instruction. const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const; + /// \brief TargetSubtargetInfo getter. + const TargetSubtargetInfo *getSubtargetInfo() const { return STI; } + /// \brief TargetInstrInfo getter. const TargetInstrInfo *getInstrInfo() const { return TII; } diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h index dd730495a5f6..8043024626a0 100644 --- a/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/include/llvm/CodeGen/WinEHFuncInfo.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/WinEHFuncInfo.h ----------------------------*- C++ -*-===// +//===- llvm/CodeGen/WinEHFuncInfo.h -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,28 +17,26 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Instructions.h" +#include <cstdint> +#include <limits> +#include <utility> namespace llvm { + class AllocaInst; class BasicBlock; -class CatchReturnInst; -class Constant; +class FuncletPadInst; class Function; class GlobalVariable; +class Instruction; class InvokeInst; -class IntrinsicInst; -class LandingPadInst; -class MCExpr; -class MCSymbol; class MachineBasicBlock; -class Value; +class MCSymbol; // The following structs respresent the .xdata tables for various // Windows-related EH personalities. -typedef PointerUnion<const BasicBlock *, MachineBasicBlock *> MBBOrBasicBlock; +using MBBOrBasicBlock = PointerUnion<const BasicBlock *, MachineBasicBlock *>; struct CxxUnwindMapEntry { int ToState; @@ -99,18 +97,18 @@ struct WinEHFuncInfo { SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap; SmallVector<SEHUnwindMapEntry, 4> SEHUnwindMap; SmallVector<ClrEHUnwindMapEntry, 4> ClrEHUnwindMap; - int UnwindHelpFrameIdx = INT_MAX; - int PSPSymFrameIdx = INT_MAX; + int UnwindHelpFrameIdx = std::numeric_limits<int>::max(); + int PSPSymFrameIdx = std::numeric_limits<int>::max(); int getLastStateNumber() const { return CxxUnwindMap.size() - 1; } void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin, MCSymbol *InvokeEnd); - int EHRegNodeFrameIndex = INT_MAX; - int EHRegNodeEndOffset = INT_MAX; - int EHGuardFrameIndex = INT_MAX; - int SEHSetFrameOffset = INT_MAX; + int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); + int EHRegNodeEndOffset = std::numeric_limits<int>::max(); + int EHGuardFrameIndex = std::numeric_limits<int>::max(); + int SEHSetFrameOffset = std::numeric_limits<int>::max(); WinEHFuncInfo(); }; @@ -125,5 +123,7 @@ void calculateSEHStateNumbers(const Function *ParentFn, WinEHFuncInfo &FuncInfo); void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo); -} + +} // end namespace llvm + #endif // LLVM_CODEGEN_WINEHFUNCINFO_H diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h index 4e8c8feb7a12..9890263ae2d2 100644 --- a/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/include/llvm/DebugInfo/CodeView/CodeView.h @@ -574,6 +574,14 @@ struct FrameData { IsFunctionStart = 1 << 2, }; }; + +enum class CodeViewContainer { ObjectFile, Pdb }; + +inline uint32_t alignOf(CodeViewContainer Container) { + if (Container == CodeViewContainer::ObjectFile) + return 1; + return 4; +} } } diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index b3976826a316..db944c7057f7 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -136,6 +136,7 @@ public: Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes); Error mapByteVectorTail(std::vector<uint8_t> &Bytes); + Error padToAlignment(uint32_t Align); Error skipPadding(); private: diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h index e7036033d2d9..c958a95ee6de 100644 --- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h @@ -60,8 +60,8 @@ public: Error initialize(BinaryStreamReader Reader); Error initialize(BinaryStreamRef Stream); - Iterator begin() { return Checksums.begin(); } - Iterator end() { return Checksums.end(); } + Iterator begin() const { return Checksums.begin(); } + Iterator end() const { return Checksums.end(); } const FileChecksumArray &getArray() const { return Checksums; } diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h index e2cfc3c99233..60440700c265 100644 --- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h @@ -74,8 +74,13 @@ private: class DebugInlineeLinesSubsection final : public DebugSubsection { public: + struct Entry { + std::vector<support::ulittle32_t> ExtraFiles; + InlineeSourceLineHeader Header; + }; + DebugInlineeLinesSubsection(DebugChecksumsSubsection &Checksums, - bool HasExtraFiles); + bool HasExtraFiles = false); static bool classof(const DebugSubsection *S) { return S->kind() == DebugSubsectionKind::InlineeLines; @@ -87,16 +92,18 @@ public: void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine); void addExtraFile(StringRef FileName); + bool hasExtraFiles() const { return HasExtraFiles; } + void setHasExtraFiles(bool Has) { HasExtraFiles = Has; } + + std::vector<Entry>::const_iterator begin() const { return Entries.begin(); } + std::vector<Entry>::const_iterator end() const { return Entries.end(); } + private: DebugChecksumsSubsection &Checksums; bool HasExtraFiles = false; uint32_t ExtraFileCount = 0; - struct Entry { - std::vector<support::ulittle32_t> ExtraFiles; - InlineeSourceLineHeader Header; - }; std::vector<Entry> Entries; }; } diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h index b2e1131e5968..847259c5ceac 100644 --- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h +++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h @@ -31,28 +31,32 @@ struct DebugSubsectionHeader { class DebugSubsectionRecord { public: DebugSubsectionRecord(); - DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data); + DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data, + CodeViewContainer Container); - static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info); + static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info, + CodeViewContainer Container); uint32_t getRecordLength() const; DebugSubsectionKind kind() const; BinaryStreamRef getRecordData() const; private: + CodeViewContainer Container; DebugSubsectionKind Kind; BinaryStreamRef Data; }; class DebugSubsectionRecordBuilder { public: - DebugSubsectionRecordBuilder(DebugSubsectionKind Kind, DebugSubsection &Frag); + DebugSubsectionRecordBuilder(std::unique_ptr<DebugSubsection> Subsection, + CodeViewContainer Container); uint32_t calculateSerializedLength(); Error commit(BinaryStreamWriter &Writer); private: - DebugSubsectionKind Kind; - DebugSubsection &Frag; + std::unique_ptr<DebugSubsection> Subsection; + CodeViewContainer Container; }; } // namespace codeview @@ -62,7 +66,12 @@ template <> struct VarStreamArrayExtractor<codeview::DebugSubsectionRecord> { static Error extract(BinaryStreamRef Stream, uint32_t &Length, codeview::DebugSubsectionRecord &Info) { - if (auto EC = codeview::DebugSubsectionRecord::initialize(Stream, Info)) + // FIXME: We need to pass the container type through to this function, but + // VarStreamArray doesn't easily support stateful contexts. In practice + // this isn't super important since the subsection header describes its + // length and we can just skip it. It's more important when writing. + if (auto EC = codeview::DebugSubsectionRecord::initialize( + Stream, Info, codeview::CodeViewContainer::Pdb)) return EC; Length = Info.getRecordLength(); return Error::success(); diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h index 428ff153d5d1..7080b0480757 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -24,9 +24,9 @@ namespace codeview { class SymbolVisitorDelegate; class SymbolDeserializer : public SymbolVisitorCallbacks { struct MappingInfo { - explicit MappingInfo(ArrayRef<uint8_t> RecordData) + MappingInfo(ArrayRef<uint8_t> RecordData, CodeViewContainer Container) : Stream(RecordData, llvm::support::little), Reader(Stream), - Mapping(Reader) {} + Mapping(Reader, Container) {} BinaryByteStream Stream; BinaryStreamReader Reader; @@ -35,7 +35,9 @@ class SymbolDeserializer : public SymbolVisitorCallbacks { public: template <typename T> static Error deserializeAs(CVSymbol Symbol, T &Record) { - SymbolDeserializer S(nullptr); + // If we're just deserializing one record, then don't worry about alignment + // as there's nothing that comes after. + SymbolDeserializer S(nullptr, CodeViewContainer::ObjectFile); if (auto EC = S.visitSymbolBegin(Symbol)) return EC; if (auto EC = S.visitKnownRecord(Symbol, Record)) @@ -45,12 +47,13 @@ public: return Error::success(); } - explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate) - : Delegate(Delegate) {} + explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate, + CodeViewContainer Container) + : Delegate(Delegate), Container(Container) {} Error visitSymbolBegin(CVSymbol &Record) override { assert(!Mapping && "Already in a symbol mapping!"); - Mapping = llvm::make_unique<MappingInfo>(Record.content()); + Mapping = llvm::make_unique<MappingInfo>(Record.content(), Container); return Mapping->Mapping.visitSymbolBegin(Record); } Error visitSymbolEnd(CVSymbol &Record) override { @@ -78,6 +81,7 @@ private: } SymbolVisitorDelegate *Delegate; + CodeViewContainer Container; std::unique_ptr<MappingInfo> Mapping; }; } diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/include/llvm/DebugInfo/CodeView/SymbolDumper.h index e91065dcf87e..293daa851bdd 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -26,9 +26,11 @@ class TypeCollection; class CVSymbolDumper { public: CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types, + CodeViewContainer Container, std::unique_ptr<SymbolDumpDelegate> ObjDelegate, bool PrintRecordBytes) - : W(W), Types(Types), ObjDelegate(std::move(ObjDelegate)), + : W(W), Types(Types), Container(Container), + ObjDelegate(std::move(ObjDelegate)), PrintRecordBytes(PrintRecordBytes) {} /// Dumps one type record. Returns false if there was a type parsing error, @@ -44,6 +46,7 @@ public: private: ScopedPrinter &W; TypeCollection &Types; + CodeViewContainer Container; std::unique_ptr<SymbolDumpDelegate> ObjDelegate; bool PrintRecordBytes; diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h index 5d072a3b2723..391e8f127665 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h @@ -20,8 +20,12 @@ class BinaryStreamWriter; namespace codeview { class SymbolRecordMapping : public SymbolVisitorCallbacks { public: - explicit SymbolRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {} - explicit SymbolRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {} + explicit SymbolRecordMapping(BinaryStreamReader &Reader, + CodeViewContainer Container) + : IO(Reader), Container(Container) {} + explicit SymbolRecordMapping(BinaryStreamWriter &Writer, + CodeViewContainer Container) + : IO(Writer), Container(Container) {} Error visitSymbolBegin(CVSymbol &Record) override; Error visitSymbolEnd(CVSymbol &Record) override; @@ -35,6 +39,7 @@ private: Optional<SymbolKind> Kind; CodeViewRecordIO IO; + CodeViewContainer Container; }; } } diff --git a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h index a8fe1a3ae1d0..42adbdb4e20f 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h @@ -46,17 +46,18 @@ class SymbolSerializer : public SymbolVisitorCallbacks { public: template <typename SymType> - static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage) { + static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage, + CodeViewContainer Container) { CVSymbol Result; Result.Type = static_cast<SymbolKind>(Sym.Kind); - SymbolSerializer Serializer(Storage); + SymbolSerializer Serializer(Storage, Container); consumeError(Serializer.visitSymbolBegin(Result)); consumeError(Serializer.visitKnownRecord(Result, Sym)); consumeError(Serializer.visitSymbolEnd(Result)); return Result; } - explicit SymbolSerializer(BumpPtrAllocator &Storage); + SymbolSerializer(BumpPtrAllocator &Storage, CodeViewContainer Container); virtual Error visitSymbolBegin(CVSymbol &Record) override; virtual Error visitSymbolEnd(CVSymbol &Record) override; diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h index d68f5f70c83e..36dce393fc66 100644 --- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -44,17 +44,19 @@ class MappedBlockStream : public BinaryStream { public: static std::unique_ptr<MappedBlockStream> createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - BinaryStreamRef MsfData); + BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); static std::unique_ptr<MappedBlockStream> createIndexedStream(const MSFLayout &Layout, BinaryStreamRef MsfData, - uint32_t StreamIndex); + uint32_t StreamIndex, BumpPtrAllocator &Allocator); static std::unique_ptr<MappedBlockStream> - createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData); + createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); static std::unique_ptr<MappedBlockStream> - createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData); + createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); llvm::support::endianness getEndian() const override { return llvm::support::little; @@ -67,9 +69,7 @@ public: uint32_t getLength() override; - uint32_t getNumBytesCopied() const; - - llvm::BumpPtrAllocator &getAllocator() { return Pool; } + llvm::BumpPtrAllocator &getAllocator() { return Allocator; } void invalidateCache(); @@ -79,7 +79,7 @@ public: protected: MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, - BinaryStreamRef MsfData); + BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); private: const MSFStreamLayout &getStreamLayout() const { return StreamLayout; } @@ -94,7 +94,15 @@ private: BinaryStreamRef MsfData; typedef MutableArrayRef<uint8_t> CacheEntry; - llvm::BumpPtrAllocator Pool; + + // We just store the allocator by reference. We use this to allocate + // contiguous memory for things like arrays or strings that cross a block + // boundary, and this memory is expected to outlive the stream. For example, + // someone could create a stream, read some stuff, then close the stream, and + // we would like outstanding references to fields to remain valid since the + // entire file is mapped anyway. Because of that, the user must supply the + // allocator to allocate broken records from. + BumpPtrAllocator &Allocator; DenseMap<uint32_t, std::vector<CacheEntry>> CacheMap; }; @@ -102,18 +110,20 @@ class WritableMappedBlockStream : public WritableBinaryStream { public: static std::unique_ptr<WritableMappedBlockStream> createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator); static std::unique_ptr<WritableMappedBlockStream> createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, - uint32_t StreamIndex); + uint32_t StreamIndex, BumpPtrAllocator &Allocator); static std::unique_ptr<WritableMappedBlockStream> createDirectoryStream(const MSFLayout &Layout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); static std::unique_ptr<WritableMappedBlockStream> - createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData); + createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); llvm::support::endianness getEndian() const override { return llvm::support::little; @@ -139,7 +149,8 @@ public: protected: WritableMappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); private: MappedBlockStream ReadInterface; diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index e5858d0f45e3..2ff166b24e68 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -49,11 +49,8 @@ public: void setObjFileName(StringRef Name); void addSymbol(codeview::CVSymbol Symbol); - void addC13Fragment(std::unique_ptr<codeview::DebugLinesSubsection> Lines); - void addC13Fragment( - std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees); - void setC13FileChecksums( - std::unique_ptr<codeview::DebugChecksumsSubsection> Checksums); + void + addDebugSubsection(std::unique_ptr<codeview::DebugSubsection> Subsection); uint16_t getStreamIndex() const; StringRef getModuleName() const { return ModuleName; } @@ -83,10 +80,6 @@ private: std::vector<std::string> SourceFiles; std::vector<codeview::CVSymbol> Symbols; - std::unique_ptr<codeview::DebugChecksumsSubsection> ChecksumInfo; - std::vector<std::unique_ptr<codeview::DebugLinesSubsection>> LineInfo; - std::vector<std::unique_ptr<codeview::DebugInlineeLinesSubsection>> Inlinees; - std::vector<std::unique_ptr<codeview::DebugSubsectionRecordBuilder>> C13Builders; diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h index 822ce3ce13d3..a8121978d882 100644 --- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h +++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h @@ -12,6 +12,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" @@ -25,7 +26,7 @@ class PDBFile; class DbiModuleDescriptor; class ModuleDebugStreamRef { - typedef codeview::DebugSubsectionArray::Iterator LinesAndChecksumsIterator; + typedef codeview::DebugSubsectionArray::Iterator DebugSubsectionIterator; public: ModuleDebugStreamRef(const DbiModuleDescriptor &Module, @@ -39,12 +40,15 @@ public: iterator_range<codeview::CVSymbolArray::Iterator> symbols(bool *HadError) const; - llvm::iterator_range<LinesAndChecksumsIterator> linesAndChecksums() const; + llvm::iterator_range<DebugSubsectionIterator> subsections() const; - bool hasLineInfo() const; + bool hasDebugSubsections() const; Error commit(); + Expected<codeview::DebugChecksumsSubsectionRef> + findChecksumsSubsection() const; + private: const DbiModuleDescriptor &Mod; @@ -57,7 +61,7 @@ private: BinaryStreamRef C13LinesSubstream; BinaryStreamRef GlobalRefsSubstream; - codeview::DebugSubsectionArray LinesAndChecksums; + codeview::DebugSubsectionArray Subsections; }; } } diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h index 6aeb0a5479cb..28a14d7356d2 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -45,6 +45,8 @@ public: FixedStreamArray<support::ulittle32_t> name_ids() const; + codeview::DebugStringTableSubsectionRef getStringTable() const; + private: Error readHeader(BinaryStreamReader &Reader); Error readStrings(BinaryStreamReader &Reader); diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h index 17fba9991c2e..0ee697696ca5 100644 --- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -34,8 +34,7 @@ class TpiStream { friend class TpiStreamBuilder; public: - TpiStream(const PDBFile &File, - std::unique_ptr<msf::MappedBlockStream> Stream); + TpiStream(PDBFile &File, std::unique_ptr<msf::MappedBlockStream> Stream); ~TpiStream(); Error reload(); @@ -61,7 +60,7 @@ public: Error commit(); private: - const PDBFile &Pdb; + PDBFile &Pdb; std::unique_ptr<msf::MappedBlockStream> Stream; std::unique_ptr<codeview::LazyRandomTypeCollection> Types; diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h index 4afb5d9d63b2..8e6bb4baccaf 100644 --- a/include/llvm/IR/DIBuilder.h +++ b/include/llvm/IR/DIBuilder.h @@ -86,6 +86,10 @@ namespace llvm { /// Construct any deferred debug info descriptors. void finalize(); + /// Finalize a specific subprogram - no new variables may be added to this + /// subprogram afterwards. + void finalizeSubprogram(DISubprogram *SP); + /// A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. /// \param Lang Source programming language, eg. dwarf::DW_LANG_C99 diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h index aa74f361cda2..eef1212abc4b 100644 --- a/include/llvm/IR/DebugLoc.h +++ b/include/llvm/IR/DebugLoc.h @@ -90,12 +90,6 @@ namespace llvm { DenseMap<const MDNode *, MDNode *> &Cache, bool ReplaceLast = false); - /// Reparent all debug locations referenced by \c I that belong to \c OrigSP - /// to become (possibly indirect) children of \c NewSP. - static void reparentDebugInfo(Instruction &I, DISubprogram *OrigSP, - DISubprogram *NewSP, - DenseMap<const MDNode *, MDNode *> &Cache); - unsigned getLine() const; unsigned getCol() const; MDNode *getScope() const; diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index c46c609609e2..757ddf6cf46b 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -134,16 +134,18 @@ public: /// be renamed or references something that can't be renamed). unsigned NotEligibleToImport : 1; - /// Indicate that the global value must be considered a live root for - /// index-based liveness analysis. Used for special LLVM values such as - /// llvm.global_ctors that the linker does not know about. - unsigned LiveRoot : 1; + /// In per-module summary, indicate that the global value must be considered + /// a live root for index-based liveness analysis. Used for special LLVM + /// values such as llvm.global_ctors that the linker does not know about. + /// + /// In combined summary, indicate that the global value is live. + unsigned Live : 1; /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, - bool NotEligibleToImport, bool LiveRoot) + bool NotEligibleToImport, bool Live) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - LiveRoot(LiveRoot) {} + Live(Live) {} }; private: @@ -172,6 +174,8 @@ private: /// are listed in the derived FunctionSummary object. std::vector<ValueInfo> RefEdgeList; + bool isLive() const { return Flags.Live; } + protected: GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs) : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {} @@ -213,19 +217,17 @@ public: /// Return true if this global value can't be imported. bool notEligibleToImport() const { return Flags.NotEligibleToImport; } - /// Return true if this global value must be considered a root for live - /// value analysis on the index. - bool liveRoot() const { return Flags.LiveRoot; } - - /// Flag that this global value must be considered a root for live - /// value analysis on the index. - void setLiveRoot() { Flags.LiveRoot = true; } + void setLive(bool Live) { Flags.Live = Live; } /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } /// Return the list of values referenced by this global value definition. ArrayRef<ValueInfo> refs() const { return RefEdgeList; } + + friend class ModuleSummaryIndex; + friend void computeDeadSymbols(class ModuleSummaryIndex &, + const DenseSet<GlobalValue::GUID> &); }; /// \brief Alias summary information. @@ -535,6 +537,11 @@ private: /// GUIDs, it will be mapped to 0. std::map<GlobalValue::GUID, GlobalValue::GUID> OidGuidMap; + /// Indicates that summary-based GlobalValue GC has run, and values with + /// GVFlags::Live==false are really dead. Otherwise, all values must be + /// considered live. + bool WithGlobalValueDeadStripping = false; + // YAML I/O support. friend yaml::MappingTraits<ModuleSummaryIndex>; @@ -550,6 +557,17 @@ public: const_gvsummary_iterator end() const { return GlobalValueMap.end(); } size_t size() const { return GlobalValueMap.size(); } + bool withGlobalValueDeadStripping() const { + return WithGlobalValueDeadStripping; + } + void setWithGlobalValueDeadStripping() { + WithGlobalValueDeadStripping = true; + } + + bool isGlobalValueLive(const GlobalValueSummary *GVS) const { + return !WithGlobalValueDeadStripping || GVS->isLive(); + } + /// Return a ValueInfo for GUID if it exists, otherwise return ValueInfo(). ValueInfo getValueInfo(GlobalValue::GUID GUID) const { auto I = GlobalValueMap.find(GUID); diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index 78fdb602027d..891d84c2dbca 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -128,6 +128,8 @@ template <> struct MappingTraits<TypeIdSummary> { }; struct FunctionSummaryYaml { + unsigned Linkage; + bool NotEligibleToImport, Live; std::vector<uint64_t> TypeTests; std::vector<FunctionSummary::VFuncId> TypeTestAssumeVCalls, TypeCheckedLoadVCalls; @@ -168,6 +170,9 @@ namespace yaml { template <> struct MappingTraits<FunctionSummaryYaml> { static void mapping(IO &io, FunctionSummaryYaml& summary) { + io.mapOptional("Linkage", summary.Linkage); + io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); + io.mapOptional("Live", summary.Live); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); io.mapOptional("TypeCheckedLoadVCalls", summary.TypeCheckedLoadVCalls); @@ -199,12 +204,12 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> { } auto &Elem = V[KeyInt]; for (auto &FSum : FSums) { - GlobalValueSummary::GVFlags GVFlags(GlobalValue::ExternalLinkage, false, - false); Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>( - GVFlags, 0, ArrayRef<ValueInfo>{}, - ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests), - std::move(FSum.TypeTestAssumeVCalls), + GlobalValueSummary::GVFlags( + static_cast<GlobalValue::LinkageTypes>(FSum.Linkage), + FSum.NotEligibleToImport, FSum.Live), + 0, ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, + std::move(FSum.TypeTests), std::move(FSum.TypeTestAssumeVCalls), std::move(FSum.TypeCheckedLoadVCalls), std::move(FSum.TypeTestAssumeConstVCalls), std::move(FSum.TypeCheckedLoadConstVCalls))); @@ -216,8 +221,10 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> { for (auto &Sum : P.second.SummaryList) { if (auto *FSum = dyn_cast<FunctionSummary>(Sum.get())) FSums.push_back(FunctionSummaryYaml{ - FSum->type_tests(), FSum->type_test_assume_vcalls(), - FSum->type_checked_load_vcalls(), + FSum->flags().Linkage, + static_cast<bool>(FSum->flags().NotEligibleToImport), + static_cast<bool>(FSum->flags().Live), FSum->type_tests(), + FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); } @@ -231,6 +238,8 @@ template <> struct MappingTraits<ModuleSummaryIndex> { static void mapping(IO &io, ModuleSummaryIndex& index) { io.mapOptional("GlobalValueMap", index.GlobalValueMap); io.mapOptional("TypeIdMap", index.TypeIdMap); + io.mapOptional("WithGlobalValueDeadStripping", + index.WithGlobalValueDeadStripping); } }; diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index f01607614a0c..a5f0130f79f4 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -228,24 +228,24 @@ public: return cast<ConstantInt>(NumVMSArgs)->getZExtValue(); } - typename CallSiteTy::arg_iterator vm_state_begin() const { + typename CallSiteTy::arg_iterator deopt_begin() const { auto I = gc_transition_args_end() + 1; assert((getCallSite().arg_end() - I) >= 0); return I; } - typename CallSiteTy::arg_iterator vm_state_end() const { - auto I = vm_state_begin() + getNumTotalVMSArgs(); + typename CallSiteTy::arg_iterator deopt_end() const { + auto I = deopt_begin() + getNumTotalVMSArgs(); assert((getCallSite().arg_end() - I) >= 0); return I; } /// range adapter for vm state arguments - iterator_range<arg_iterator> vm_state_args() const { - return make_range(vm_state_begin(), vm_state_end()); + iterator_range<arg_iterator> deopt_operands() const { + return make_range(deopt_begin(), deopt_end()); } typename CallSiteTy::arg_iterator gc_args_begin() const { - return vm_state_end(); + return deopt_end(); } typename CallSiteTy::arg_iterator gc_args_end() const { return getCallSite().arg_end(); @@ -289,8 +289,8 @@ public: (void)arg_end(); (void)gc_transition_args_begin(); (void)gc_transition_args_end(); - (void)vm_state_begin(); - (void)vm_state_end(); + (void)deopt_begin(); + (void)deopt_end(); (void)gc_args_begin(); (void)gc_args_end(); } diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 5b9796d4fba6..abb0aa3e3caf 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -86,7 +86,6 @@ void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); -void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); @@ -144,8 +143,8 @@ void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGCOVProfilerLegacyPassPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); -void initializeGVNSinkLegacyPassPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); +void initializeGVNSinkLegacyPassPass(PassRegistry&); void initializeGlobalDCELegacyPassPass(PassRegistry&); void initializeGlobalMergePass(PassRegistry&); void initializeGlobalOptLegacyPassPass(PassRegistry&); @@ -175,13 +174,14 @@ void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); void initializeLCSSAWrapperPassPass(PassRegistry&); +void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&); void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&); void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&); +void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLazyValueInfoWrapperPassPass(PassRegistry&); void initializeLegacyLICMPassPass(PassRegistry&); void initializeLegacyLoopSinkPassPass(PassRegistry&); -void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLegalizerPass(PassRegistry&); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); void initializeLintPass(PassRegistry&); @@ -195,8 +195,8 @@ void initializeLiveVariablesPass(PassRegistry&); void initializeLoadCombinePass(PassRegistry&); void initializeLoadStoreVectorizerPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); -void initializeLocalizerPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); +void initializeLocalizerPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&); @@ -304,6 +304,7 @@ void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); +void initializeRABasicPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); @@ -327,8 +328,9 @@ void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); -void initializeScalarizerPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); +void initializeScalarizerPass(PassRegistry&); +void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h index 5ba8492db8f5..73106f77ca55 100644 --- a/include/llvm/LTO/Config.h +++ b/include/llvm/LTO/Config.h @@ -46,6 +46,9 @@ struct Config { unsigned OptLevel = 2; bool DisableVerify = false; + /// Use the new pass manager + bool UseNewPM = false; + /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h index 6ddae2e2b41c..a6d4d404415f 100644 --- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h +++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h @@ -17,12 +17,20 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/ObjectYAML/YAML.h" namespace llvm { + +namespace codeview { +class DebugStringTableSubsection; +class DebugStringTableSubsectionRef; +class DebugChecksumsSubsectionRef; +} namespace CodeViewYAML { + namespace detail { -struct C13FragmentBase; +struct YAMLSubsectionBase; } struct SourceLineEntry { @@ -74,18 +82,24 @@ struct InlineeInfo { std::vector<InlineeSite> Sites; }; -struct SourceFileInfo { - std::vector<SourceFileChecksumEntry> FileChecksums; - std::vector<SourceLineInfo> LineFragments; - std::vector<InlineeInfo> Inlinees; -}; +struct YAMLDebugSubsection { + static Expected<YAMLDebugSubsection> + fromCodeViewSubection(const codeview::DebugStringTableSubsectionRef &Strings, + const codeview::DebugChecksumsSubsectionRef &Checksums, + const codeview::DebugSubsectionRecord &SS); -struct C13DebugSection { - std::vector<detail::C13FragmentBase> Fragments; + std::shared_ptr<detail::YAMLSubsectionBase> Subsection; }; + +Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>> +convertSubsectionList(ArrayRef<YAMLDebugSubsection> Subsections, + codeview::DebugStringTableSubsection &Strings); + } // namespace CodeViewYAML } // namespace llvm -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceFileInfo) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::YAMLDebugSubsection) + +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::YAMLDebugSubsection) #endif diff --git a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h index ee4e2ac9d404..9b411e8b074f 100644 --- a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h +++ b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h @@ -28,7 +28,9 @@ struct SymbolRecordBase; struct SymbolRecord { std::shared_ptr<detail::SymbolRecordBase> Symbol; - codeview::CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator) const; + codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + codeview::CodeViewContainer Container) const; static Expected<SymbolRecord> fromCodeViewSymbol(codeview::CVSymbol Symbol); }; diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index 3c181f0e511b..5c3bf88fbbfa 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -1361,10 +1361,6 @@ public: return false; } - bool isTemplateArg(StringRef Name) const { - return isTemplateArg(StringInit::get(Name)); - } - const RecordVal *getValue(const Init *Name) const { for (const RecordVal &Val : Values) if (Val.Name == Name) return &Val; @@ -1388,10 +1384,6 @@ public: TemplateArgs.push_back(Name); } - void addTemplateArg(StringRef Name) { - addTemplateArg(StringInit::get(Name)); - } - void addValue(const RecordVal &RV) { assert(getValue(RV.getNameInit()) == nullptr && "Value already added!"); Values.push_back(RV); diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h index d66b6edc7a4f..de35cdf052e1 100644 --- a/include/llvm/Transforms/IPO/FunctionImport.h +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -81,15 +81,11 @@ public: /// \p ExportLists contains for each Module the set of globals (GUID) that will /// be imported by another module, or referenced by such a function. I.e. this /// is the set of globals that need to be promoted/renamed appropriately. -/// -/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and -/// will be ignored for the purpose of importing. void ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, StringMap<FunctionImporter::ImportMapTy> &ImportLists, - StringMap<FunctionImporter::ExportSetTy> &ExportLists, - const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr); + StringMap<FunctionImporter::ExportSetTy> &ExportLists); /// Compute all the imports for the given module using the Index. /// @@ -102,9 +98,9 @@ void ComputeCrossModuleImportForModule( /// Compute all the symbols that are "dead": i.e these that can't be reached /// in the graph from any of the given symbols listed in /// \p GUIDPreservedSymbols. -DenseSet<GlobalValue::GUID> -computeDeadSymbols(const ModuleSummaryIndex &Index, - const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols); +void computeDeadSymbols( + ModuleSummaryIndex &Index, + const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols); /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 023d7af7f729..b6c6c091631d 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -177,6 +177,7 @@ struct SanitizerCoverageOptions { bool Use8bitCounters = false; bool TracePC = false; bool TracePCGuard = false; + bool Inline8bitCounters = false; bool NoPrune = false; SanitizerCoverageOptions() = default; diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 91c9d255302f..2a8b89d86282 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -36,6 +36,7 @@ class BasicBlock; class BlockFrequencyInfo; class CallInst; class CallGraph; +class DebugInfoFinder; class DominatorTree; class Function; class Instruction; @@ -110,7 +111,8 @@ struct ClonedCodeInfo { /// BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr); + ClonedCodeInfo *CodeInfo = nullptr, + DebugInfoFinder *DIFinder = nullptr); /// CloneFunction - Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 6a1af87450c9..a906770dbb34 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1170,7 +1170,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, const DataLayout &DL, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp 0, (ptrtoint x) -> icmp null, x // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // @@ -1240,6 +1242,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + } else if (isa<ConstantExpr>(Ops1)) { + // If RHS is a constant expression, but the left side isn't, swap the + // operands and try again. + Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } return ConstantExpr::getCompare(Predicate, Ops0, Ops1); diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 3da33ac71421..ed233d201537 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -43,7 +43,7 @@ static cl::opt<unsigned> // The percent threshold for the direct-call target (this call site vs the // total call count) for it to be considered as the promotion target. static cl::opt<unsigned> - ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the promotion")); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 4702569126c6..77c87928728a 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -54,11 +54,6 @@ static cl::opt<int> cl::init(45), cl::desc("Threshold for inlining cold callsites")); -static cl::opt<bool> - EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden, - cl::init(false), - cl::desc("Enable generic switch cost model")); - // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. @@ -1015,83 +1010,68 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { if (isa<ConstantInt>(V)) return true; - if (EnableGenericSwitchCost) { - // Assume the most general case where the swith is lowered into - // either a jump table, bit test, or a balanced binary tree consisting of - // case clusters without merging adjacent clusters with the same - // destination. We do not consider the switches that are lowered with a mix - // of jump table/bit test/binary search tree. The cost of the switch is - // proportional to the size of the tree or the size of jump table range. - - // Exit early for a large switch, assuming one case needs at least one - // instruction. - // FIXME: This is not true for a bit test, but ignore such case for now to - // save compile-time. - int64_t CostLowerBound = - std::min((int64_t)INT_MAX, - (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - - if (CostLowerBound > Threshold) { - Cost = CostLowerBound; - return false; - } + // Assume the most general case where the swith is lowered into + // either a jump table, bit test, or a balanced binary tree consisting of + // case clusters without merging adjacent clusters with the same + // destination. We do not consider the switches that are lowered with a mix + // of jump table/bit test/binary search tree. The cost of the switch is + // proportional to the size of the tree or the size of jump table range. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. - unsigned JumpTableSize = 0; - unsigned NumCaseCluster = - TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + // Exit early for a large switch, assuming one case needs at least one + // instruction. + // FIXME: This is not true for a bit test, but ignore such case for now to + // save compile-time. + int64_t CostLowerBound = + std::min((int64_t)INT_MAX, + (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - // If suitable for a jump table, consider the cost for the table size and - // branch to destination. - if (JumpTableSize) { - int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + - 4 * InlineConstants::InstrCost; - Cost = std::min((int64_t)INT_MAX, JTCost + Cost); - return false; - } + if (CostLowerBound > Threshold) { + Cost = CostLowerBound; + return false; + } - // Considering forming a binary search, we should find the number of nodes - // which is same as the number of comparisons when lowered. For a given - // number of clusters, n, we can define a recursive function, f(n), to find - // the number of nodes in the tree. The recursion is : - // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, - // and f(n) = n, when n <= 3. - // This will lead a binary tree where the leaf should be either f(2) or f(3) - // when n > 3. So, the number of comparisons from leaves should be n, while - // the number of non-leaf should be : - // 2^(log2(n) - 1) - 1 - // = 2^log2(n) * 2^-1 - 1 - // = n / 2 - 1. - // Considering comparisons from leaf and non-leaf nodes, we can estimate the - // number of comparisons in a simple closed form : - // n + n / 2 - 1 = n * 3 / 2 - 1 - if (NumCaseCluster <= 3) { - // Suppose a comparison includes one compare and one conditional branch. - Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; - return false; - } - int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; - uint64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; - Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); + unsigned JumpTableSize = 0; + unsigned NumCaseCluster = + TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + + // If suitable for a jump table, consider the cost for the table size and + // branch to destination. + if (JumpTableSize) { + int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + + 4 * InlineConstants::InstrCost; + Cost = std::min((int64_t)INT_MAX, JTCost + Cost); return false; } - // Use a simple switch cost model where we accumulate a cost proportional to - // the number of distinct successor blocks. This fan-out in the CFG cannot - // be represented for free even if we can represent the core switch as a - // jumptable that takes a single instruction. - /// - // NB: We convert large switches which are just used to initialize large phi - // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent - // inlining those. It will prevent inlining in cases where the optimization - // does not (yet) fire. - SmallPtrSet<BasicBlock *, 8> SuccessorBlocks; - SuccessorBlocks.insert(SI.getDefaultDest()); - for (auto Case : SI.cases()) - SuccessorBlocks.insert(Case.getCaseSuccessor()); - // Add cost corresponding to the number of distinct destinations. The first - // we model as free because of fallthrough. - Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + // Considering forming a binary search, we should find the number of nodes + // which is same as the number of comparisons when lowered. For a given + // number of clusters, n, we can define a recursive function, f(n), to find + // the number of nodes in the tree. The recursion is : + // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, + // and f(n) = n, when n <= 3. + // This will lead a binary tree where the leaf should be either f(2) or f(3) + // when n > 3. So, the number of comparisons from leaves should be n, while + // the number of non-leaf should be : + // 2^(log2(n) - 1) - 1 + // = 2^log2(n) * 2^-1 - 1 + // = n / 2 - 1. + // Considering comparisons from leaf and non-leaf nodes, we can estimate the + // number of comparisons in a simple closed form : + // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { + // Suppose a comparison includes one compare and one conditional branch. + Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + return false; + } + int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; + uint64_t SwitchCost = + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); return false; } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index a2b9015a8a1d..6a9ae6440ace 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -662,13 +662,13 @@ namespace { bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB); bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, BasicBlock *BB); - bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI, BasicBlock *BB); - bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV, - Instruction *BBI); + Instruction *BBI); void solve(); @@ -849,12 +849,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, return true; } if (BBI->getType()->isIntegerTy()) { - if (isa<CastInst>(BBI)) - return solveBlockValueCast(Res, BBI, BB); - + if (auto *CI = dyn_cast<CastInst>(BBI)) + return solveBlockValueCast(Res, CI, BB); + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); if (BO && isa<ConstantInt>(BO->getOperand(1))) - return solveBlockValueBinaryOp(Res, BBI, BB); + return solveBlockValueBinaryOp(Res, BO, BB); } DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -1168,9 +1168,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, } bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, - Instruction *BBI, - BasicBlock *BB) { - if (!BBI->getOperand(0)->getType()->isSized()) { + CastInst *CI, + BasicBlock *BB) { + if (!CI->getOperand(0)->getType()->isSized()) { // Without knowing how wide the input is, we can't analyze it in any useful // way. BBLV = LVILatticeVal::getOverdefined(); @@ -1180,7 +1180,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Filter out casts we don't know how to reason about before attempting to // recurse on our operand. This can cut a long search short if we know we're // not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + switch (CI->getOpcode()) { case Instruction::Trunc: case Instruction::SExt: case Instruction::ZExt: @@ -1197,44 +1197,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, we still apply the // transfer rule on the full set since we may be able to locally infer // interesting facts. - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(CI->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, CI->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(CI->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(CI->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal, + CI); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - const unsigned ResultBitWidth = - cast<IntegerType>(BBI->getType())->getBitWidth(); + const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto CastOp = (Instruction::CastOps) BBI->getOpcode(); - BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth)); + BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(), + ResultBitWidth)); return true; } bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, - Instruction *BBI, + BinaryOperator *BO, BasicBlock *BB) { - assert(BBI->getOperand(0)->getType()->isSized() && + assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); // Filter out operators we don't know how to reason about before attempting to // recurse on our operand(s). This can cut a long search short if we know - // we're not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + // we're not going to be able to get any useful information anyways. + switch (BO->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1256,29 +1255,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, use a conservative range, // but apply the transfer rule anyways. This lets us pick up facts from // expressions like "and i32 (call i32 @foo()), 32" - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(BO->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, BO->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(BO->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(BO->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal, + BO); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - ConstantInt *RHS = cast<ConstantInt>(BBI->getOperand(1)); + ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1)); ConstantRange RHSRange = ConstantRange(RHS->getValue()); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto BinOp = (Instruction::BinaryOps) BBI->getOpcode(); + Instruction::BinaryOps BinOp = BO->getOpcode(); BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange)); return true; } diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 26706f5509ba..3253f27c010d 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -275,7 +275,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* LiveRoot = */ false); + /* Live = */ false); auto FuncSummary = llvm::make_unique<FunctionSummary>( Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), @@ -295,7 +295,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -308,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet<GlobalValue::GUID> &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{}); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); @@ -323,7 +323,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name))) for (auto &Summary : VI.getSummaryList()) - Summary->setLiveRoot(); + Summary->setLive(true); } ModuleSummaryIndex llvm::buildModuleSummaryIndex( @@ -423,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( return; assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, - /* NotEligibleToImport */ true, - /* LiveRoot */ true); + /* NotEligibleToImport = */ true, + /* Live = */ true); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (isa<Function>(GV)) { diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp index 0f0016f22cc0..a04c0aef04be 100644 --- a/lib/Analysis/OrderedBasicBlock.cpp +++ b/lib/Analysis/OrderedBasicBlock.cpp @@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A, assert(II != IE && "Instruction not found?"); assert((Inst == A || Inst == B) && "Should find A or B"); LastInstFound = II; - return Inst == A; + return Inst != B; } /// \brief Find out whether \p A dominates \p B, meaning whether \p A diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 82107cb18025..b38e6225c840 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/OptBisect.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -280,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { return new PrintRegionPass(Banner, O); } + +bool RegionPass::skipRegion(Region &R) const { + Function &F = *R.getEntry()->getParent(); + if (!F.getContext().getOptBisect().shouldRunPass(this, R)) + return true; + + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // Report this only once per function. + if (R.getEntry() == &F.getEntryBlock()) + DEBUG(dbgs() << "Skipping pass '" << getPassName() + << "' on function " << F.getName() << "\n"); + return true; + } + return false; +} diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 686c94687669..fffa9045b2fd 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -865,11 +865,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits RawFlags = RawFlags >> 4; bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3; - // The LiveRoot flag wasn't introduced until version 3. For dead stripping + // The Live flag wasn't introduced until version 3. For dead stripping // to work correctly on earlier versions, we must conservatively treat all // values as live. - bool LiveRoot = (RawFlags & 0x2) || Version < 3; - return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot); + bool Live = (RawFlags & 0x2) || Version < 3; + return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live); } static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index a402b4ddd462..9043b8c12d25 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -351,7 +351,8 @@ public: /// Calls the callback for each value GUID and summary to be written to /// bitcode. This hides the details of whether they are being pulled from the /// entire index or just those in a provided ModuleToSummariesForIndex map. - void forEachSummary(std::function<void(GVInfo)> Callback) { + template<typename Functor> + void forEachSummary(Functor Callback) { if (ModuleToSummariesForIndex) { for (auto &M : *ModuleToSummariesForIndex) for (auto &Summary : M.second) @@ -363,6 +364,29 @@ public: } } + /// Calls the callback for each entry in the modulePaths StringMap that + /// should be written to the module path string table. This hides the details + /// of whether they are being pulled from the entire index or just those in a + /// provided ModuleToSummariesForIndex map. + template <typename Functor> void forEachModule(Functor Callback) { + if (ModuleToSummariesForIndex) { + for (const auto &M : *ModuleToSummariesForIndex) { + const auto &MPI = Index.modulePaths().find(M.first); + if (MPI == Index.modulePaths().end()) { + // This should only happen if the bitcode file was empty, in which + // case we shouldn't be importing (the ModuleToSummariesForIndex + // would only include the module we are writing and index for). + assert(ModuleToSummariesForIndex->size() == 1); + continue; + } + Callback(*MPI); + } + } else { + for (const auto &MPSE : Index.modulePaths()) + Callback(MPSE); + } + } + /// Main entry point for writing a combined index to bitcode. void write(); @@ -370,14 +394,6 @@ private: void writeModStrings(); void writeCombinedGlobalValueSummary(); - /// Indicates whether the provided \p ModulePath should be written into - /// the module string table, e.g. if full index written or if it is in - /// the provided subset. - bool doIncludeModule(StringRef ModulePath) { - return !ModuleToSummariesForIndex || - ModuleToSummariesForIndex->count(ModulePath); - } - Optional<unsigned> getValueId(GlobalValue::GUID ValGUID) { auto VMI = GUIDToValueIdMap.find(ValGUID); if (VMI == GUIDToValueIdMap.end()) @@ -864,7 +880,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { uint64_t RawFlags = 0; RawFlags |= Flags.NotEligibleToImport; // bool - RawFlags |= (Flags.LiveRoot << 1); + RawFlags |= (Flags.Live << 1); // Linkage don't need to be remapped at that time for the summary. Any future // change to the getEncodedLinkage() function will need to be taken into // account here as well. @@ -968,19 +984,18 @@ void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() { enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; /// Determine the encoding to use for the given string name and length. -static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { +static StringEncoding getStringEncoding(StringRef Str) { bool isChar6 = true; - for (const char *C = Str, *E = C + StrLen; C != E; ++C) { + for (char C : Str) { if (isChar6) - isChar6 = BitCodeAbbrevOp::isChar6(*C); - if ((unsigned char)*C & 128) + isChar6 = BitCodeAbbrevOp::isChar6(C); + if ((unsigned char)C & 128) // don't bother scanning the rest. return SE_Fixed8; } if (isChar6) return SE_Char6; - else - return SE_Fixed7; + return SE_Fixed7; } /// Emit top-level description of module, including target triple, inline asm, @@ -1073,8 +1088,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { SmallVector<unsigned, 64> Vals; // Emit the module's source file name. { - StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), - M.getSourceFileName().size()); + StringEncoding Bits = getStringEncoding(M.getSourceFileName()); BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); if (Bits == SE_Char6) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); @@ -2790,8 +2804,7 @@ void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable( for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. - StringEncoding Bits = - getStringEncoding(Name.getKeyData(), Name.getKeyLength()); + StringEncoding Bits = getStringEncoding(Name.getKey()); unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); @@ -3149,41 +3162,33 @@ void IndexBitcodeWriter::writeModStrings() { unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv)); SmallVector<unsigned, 64> Vals; - for (const auto &MPSE : Index.modulePaths()) { - if (!doIncludeModule(MPSE.getKey())) - continue; - StringEncoding Bits = - getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); - unsigned AbbrevToUse = Abbrev8Bit; - if (Bits == SE_Char6) - AbbrevToUse = Abbrev6Bit; - else if (Bits == SE_Fixed7) - AbbrevToUse = Abbrev7Bit; - - Vals.push_back(MPSE.getValue().first); - - for (const auto P : MPSE.getKey()) - Vals.push_back((unsigned char)P); - - // Emit the finished record. - Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); - - Vals.clear(); - // Emit an optional hash for the module now - auto &Hash = MPSE.getValue().second; - bool AllZero = true; // Detect if the hash is empty, and do not generate it - for (auto Val : Hash) { - if (Val) - AllZero = false; - Vals.push_back(Val); - } - if (!AllZero) { - // Emit the hash record. - Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); - } + forEachModule( + [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) { + StringRef Key = MPSE.getKey(); + const auto &Value = MPSE.getValue(); + StringEncoding Bits = getStringEncoding(Key); + unsigned AbbrevToUse = Abbrev8Bit; + if (Bits == SE_Char6) + AbbrevToUse = Abbrev6Bit; + else if (Bits == SE_Fixed7) + AbbrevToUse = Abbrev7Bit; + + Vals.push_back(Value.first); + Vals.append(Key.begin(), Key.end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); + + // Emit an optional hash for the module now + const auto &Hash = Value.second; + if (llvm::any_of(Hash, [](uint32_t H) { return H; })) { + Vals.assign(Hash.begin(), Hash.end()); + // Emit the hash record. + Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); + } - Vals.clear(); - } + Vals.clear(); + }); Stream.ExitBlock(); } diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 20e1467b30c3..c2ad9db81cfd 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -194,6 +194,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // some variables. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg()) { + // Ignore call instructions that claim to clobber SP. The AArch64 + // backend does this for aggregate function arguments. + if (MI.isCall() && MO.getReg() == SP) + continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. if (TRI->isVirtualRegister(MO.getReg())) diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 4d30c6574b12..256a0c95d365 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -77,6 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRASchedulerPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); + initializeRABasicPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 23812a2a2344..3603f9b7ed93 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -556,6 +556,10 @@ bool GlobalMerge::doInitialization(Module &M) { if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) continue; + // It's not safe to merge globals that may be preempted + if (TM && !TM->shouldAssumeDSOLocal(M, &GV)) + continue; + if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && !GV.hasInternalLinkage()) continue; diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index 0dc1079b2ad4..cde6ccd29dfd 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -198,13 +198,12 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { } void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); if (!MBB.succ_empty()) { - const MachineFunction &MF = *MBB.getParent(); addPristines(*this, MF); addLiveOutsNoPristines(MBB); } else if (MBB.isReturnBlock()) { // For the return block: Add all callee saved registers. - const MachineFunction &MF = *MBB.getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.isCalleeSavedInfoValid()) addCalleeSavedRegs(*this, MF); diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index dff555f49565..3746b74e0528 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" + #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -81,46 +83,50 @@ void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { } /// Add live-in registers of basic block \p MBB to \p LiveUnits. -static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) { +static void addBlockLiveIns(LiveRegUnits &LiveUnits, + const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask); } -static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addLiveIns(LiveUnits, *Succ); +/// Adds all callee saved registers to \p LiveUnits. +static void addCalleeSavedRegs(LiveRegUnits &LiveUnits, + const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) + LiveUnits.addReg(*CSR); } -/// Add pristine registers to the given \p LiveUnits. This function removes -/// actually saved callee save registers when \p InPrologueEpilogue is false. -static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF, - const MachineFrameInfo &MFI, - const TargetRegisterInfo &TRI) { +/// Adds pristine registers to the given \p LiveUnits. Pristine registers are +/// callee saved registers that are unused in the function. +static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + /// Add all callee saved regs, then remove the ones that are saved+restored. + addCalleeSavedRegs(LiveUnits, MF); + /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveUnits.removeReg(Info.getReg()); } void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); - if (!MBB.isReturnBlock()) - removeSavedRegs(*this, MF, MFI, *TRI); + if (!MBB.succ_empty()) { + addPristines(*this, MF); + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*this, *Succ); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addCalleeSavedRegs(*this, MF); } - ::addLiveOuts(*this, MBB); } void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); - if (&MBB != &MF.front()) - removeSavedRegs(*this, MF, MFI, *TRI); - } - ::addLiveIns(*this, MBB); + addPristines(*this, MF); + addBlockLiveIns(*this, MBB); } diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp index 71ad4e6aa7f5..2402ffdbbcb1 100644 --- a/lib/CodeGen/MachineRegionInfo.cpp +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -1,7 +1,19 @@ -#include "llvm/CodeGen/MachineRegionInfo.h" +//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "machine-region-info" @@ -11,36 +23,29 @@ STATISTIC(numMachineRegions, "The # of machine regions"); STATISTIC(numMachineSimpleRegions, "The # of simple machine regions"); namespace llvm { + template class RegionBase<RegionTraits<MachineFunction>>; template class RegionNodeBase<RegionTraits<MachineFunction>>; template class RegionInfoBase<RegionTraits<MachineFunction>>; -} + +} // end namespace llvm //===----------------------------------------------------------------------===// // MachineRegion implementation -// MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, MachineRegionInfo* RI, MachineDominatorTree *DT, MachineRegion *Parent) : - RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) { + RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {} -} - -MachineRegion::~MachineRegion() { } +MachineRegion::~MachineRegion() = default; //===----------------------------------------------------------------------===// // MachineRegionInfo implementation -// - -MachineRegionInfo::MachineRegionInfo() : - RegionInfoBase<RegionTraits<MachineFunction>>() { - -} -MachineRegionInfo::~MachineRegionInfo() { +MachineRegionInfo::MachineRegionInfo() = default; -} +MachineRegionInfo::~MachineRegionInfo() = default; void MachineRegionInfo::updateStatistics(MachineRegion *R) { ++numMachineRegions; @@ -73,9 +78,7 @@ MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) { initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry()); } -MachineRegionInfoPass::~MachineRegionInfoPass() { - -} +MachineRegionInfoPass::~MachineRegionInfoPass() = default; bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { releaseMemory(); @@ -137,8 +140,9 @@ INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE, // the link time optimization. namespace llvm { - FunctionPass *createMachineRegionInfoPass() { - return new MachineRegionInfoPass(); - } + +FunctionPass *createMachineRegionInfoPass() { + return new MachineRegionInfoPass(); } +} // end namespace llvm diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 265f93c363ca..f6dbf667cf02 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -909,17 +910,43 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - // Generic loads and stores must have a single MachineMemOperand - // describing that access. - if ((MI->getOpcode() == TargetOpcode::G_LOAD || - MI->getOpcode() == TargetOpcode::G_STORE) && - !MI->hasOneMemOperand()) - report("Generic instruction accessing memory must have one mem operand", - MI); - StringRef ErrorInfo; if (!TII->verifyInstruction(*MI, ErrorInfo)) report(ErrorInfo.data(), MI); + + // Verify properties of various specific instruction types + switch(MI->getOpcode()) { + default: + break; + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: + // Generic loads and stores must have a single MachineMemOperand + // describing that access. + if (!MI->hasOneMemOperand()) + report("Generic instruction accessing memory must have one mem operand", + MI); + break; + case TargetOpcode::STATEPOINT: + if (!MI->getOperand(StatepointOpers::IDPos).isImm() || + !MI->getOperand(StatepointOpers::NBytesPos).isImm() || + !MI->getOperand(StatepointOpers::NCallArgsPos).isImm()) + report("meta operands to STATEPOINT not constant!", MI); + break; + + auto VerifyStackMapConstant = [&](unsigned Offset) { + if (!MI->getOperand(Offset).isImm() || + MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || + !MI->getOperand(Offset + 1).isImm()) + report("stack map constant to STATEPOINT not well formed!", MI); + }; + const unsigned VarStart = StatepointOpers(MI).getVarIdx(); + VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset); + VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset); + VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset); + + // TODO: verify we have properly encoded deopt arguments + + }; } void diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index a9813e534c5f..e9f8d43fe643 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -54,8 +54,6 @@ static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks); -static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS); - namespace { class PEI : public MachineFunctionPass { public: @@ -84,7 +82,7 @@ private: const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks)> SpillCalleeSavedRegisters; - std::function<void(MachineFunction &MF, RegScavenger *RS)> + std::function<void(MachineFunction &MF, RegScavenger &RS)> ScavengeFrameVirtualRegs; bool UsesCalleeSaves = false; @@ -142,7 +140,6 @@ MachineFunctionPass *llvm::createPrologEpilogInserterPass() { return new PEI(); } -STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); @@ -168,10 +165,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, unsigned &, unsigned &, const MBBVector &, const MBBVector &) {}; - ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {}; + ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; } else { SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; - ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs; + ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; UsesCalleeSaves = true; } } @@ -222,7 +219,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { - ScavengeFrameVirtualRegs(Fn, RS); + ScavengeFrameVirtualRegs(Fn, *RS); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); @@ -1153,92 +1150,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, RS->forward(MI); } } - -/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers -/// with physical registers. Use the register scavenger to find an -/// appropriate register to use. -/// -/// FIXME: Iterating over the instruction stream is unnecessary. We can simply -/// iterate over the vreg use list, which at this point only contains machine -/// operands for which eliminateFrameIndex need a new scratch reg. -static void -doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) { - // Run through the instructions and find any virtual registers. - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) { - RS->enterBasicBlock(MBB); - - int SPAdj = 0; - - // The instruction stream may change in the loop, so check MBB.end() - // directly. - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - // We might end up here again with a NULL iterator if we scavenged a - // register for which we inserted spill code for definition by what was - // originally the first instruction in MBB. - if (I == MachineBasicBlock::iterator(nullptr)) - I = MBB.begin(); - - const MachineInstr &MI = *I; - MachineBasicBlock::iterator J = std::next(I); - MachineBasicBlock::iterator P = - I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) - : std::prev(I); - - // RS should process this instruction before we might scavenge at this - // location. This is because we might be replacing a virtual register - // defined by this instruction, and if so, registers killed by this - // instruction are available, and defined registers are not. - RS->forward(I); - - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // When we first encounter a new virtual register, it - // must be a definition. - assert(MO.isDef() && "frame index virtual missing def!"); - // Scavenge a new scratch register - const TargetRegisterClass *RC = MRI.getRegClass(Reg); - unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); - - ++NumScavengedRegs; - - // Replace this reference to the virtual register with the - // scratch register. - assert(ScratchReg && "Missing scratch register!"); - MRI.replaceRegWith(Reg, ScratchReg); - - // Because this instruction was processed by the RS before this - // register was allocated, make sure that the RS now records the - // register as being used. - RS->setRegUsed(ScratchReg); - } - - // If the scavenger needed to use one of its spill slots, the - // spill code will have been inserted in between I and J. This is a - // problem because we need the spill code before I: Move I to just - // prior to J. - if (I != std::prev(J)) { - MBB.splice(J, &MBB, I); - - // Before we move I, we need to prepare the RS to visit I again. - // Specifically, RS will assert if it sees uses of registers that - // it believes are undefined. Because we have already processed - // register kills in I, when it visits I again, it will believe that - // those registers are undefined. To avoid this situation, unprocess - // the instruction I. - assert(RS->getCurrentPosition() == I && - "The register scavenger has an unexpected position"); - I = P; - RS->unprocess(P); - } else - ++I; - } - } - - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); -} diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index a87fed3a687e..24be7ea98d82 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -58,8 +58,9 @@ namespace { /// whenever a register is unavailable. This is not practical in production but /// provides a useful baseline both for measuring other allocators and comparing /// the speed of the basic algorithm against other styles of allocators. -class RABasic : public MachineFunctionPass, public RegAllocBase -{ +class RABasic : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { // context MachineFunction *MF; @@ -72,6 +73,9 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // selectOrSplit(). BitVector UsableRegs; + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + public: RABasic(); @@ -121,17 +125,46 @@ char RABasic::ID = 0; } // end anonymous namespace +char &llvm::RABasicID = RABasic::ID; + +INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(MachineScheduler) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) +INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, + false) + +bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (VRM->hasPhys(VirtReg)) { + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) { + if (!VRM->hasPhys(VirtReg)) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + enqueue(&LI); +} + RABasic::RABasic(): MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); - initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -200,7 +233,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Matrix->unassign(Spill); // Spill the extracted interval. - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); } return true; @@ -259,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 3b5964eef55e..b2dfef91add5 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -49,9 +49,11 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PBQP/Graph.h" +#include "llvm/CodeGen/PBQP/Math.h" #include "llvm/CodeGen/PBQP/Solution.h" #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/RegAllocPBQP.h" @@ -139,13 +141,13 @@ public: } private: - typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; - typedef std::vector<const LiveInterval*> Node2LIMap; - typedef std::vector<unsigned> AllowedSet; - typedef std::vector<AllowedSet> AllowedSetMap; - typedef std::pair<unsigned, unsigned> RegPair; - typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; - typedef std::set<unsigned> RegSet; + using LI2NodeMap = std::map<const LiveInterval *, unsigned>; + using Node2LIMap = std::vector<const LiveInterval *>; + using AllowedSet = std::vector<unsigned>; + using AllowedSetMap = std::vector<AllowedSet>; + using RegPair = std::pair<unsigned, unsigned>; + using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>; + using RegSet = std::set<unsigned>; char *customPassID; @@ -212,12 +214,12 @@ public: /// @brief Add interference edges between overlapping vregs. class Interference : public PBQPRAConstraint { private: - typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; - typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey; - typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache; - typedef DenseSet<IKey> DisjointAllowedRegsCache; - typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey; - typedef DenseSet<IEdgeKey> IEdgeCache; + using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *; + using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>; + using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>; + using DisjointAllowedRegsCache = DenseSet<IKey>; + using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>; + using IEdgeCache = DenseSet<IEdgeKey>; bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, @@ -252,8 +254,8 @@ private: // for the fast interference graph construction algorithm. The last is there // to save us from looking up node ids via the VRegToNode map in the graph // metadata. - typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId> - IntervalInfo; + using IntervalInfo = + std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>; static SlotIndex getStartPoint(const IntervalInfo &I) { return std::get<0>(I)->segments[std::get<1>(I)].start; @@ -320,9 +322,10 @@ public: // Cache known disjoint allowed registers pairs DisjointAllowedRegsCache D; - typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; - typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, - decltype(&lowestStartPoint)> IntervalQueue; + using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>; + using IntervalQueue = + std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, + decltype(&lowestStartPoint)>; IntervalSet Active(lowestEndPoint); IntervalQueue Inactive(lowestStartPoint); @@ -658,7 +661,6 @@ void RegAllocPBQP::spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, Spiller &VRegSpiller) { - VRegsToAlloc.erase(VReg); LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM, nullptr, &DeadRemats); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 0635e5c0a63c..1aed58c36e17 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -15,18 +15,23 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterScavenging.h" + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -39,6 +44,8 @@ using namespace llvm; #define DEBUG_TYPE "reg-scavenging" +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); + void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -469,3 +476,120 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } + +void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { + // FIXME: Iterating over the instruction stream is unnecessary. We can simply + // iterate over the vreg use list, which at this point only contains machine + // operands for which eliminateFrameIndex need a new scratch reg. + + // Run through the instructions and find any virtual registers. + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineBasicBlock &MBB : MF) { + RS.enterBasicBlock(MBB); + + int SPAdj = 0; + + // The instruction stream may change in the loop, so check MBB.end() + // directly. + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in MBB. + if (I == MachineBasicBlock::iterator(nullptr)) + I = MBB.begin(); + + const MachineInstr &MI = *I; + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) + : std::prev(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS.forward(I); + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MO.isDef() && "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the + // scratch register. + assert(ScratchReg && "Missing scratch register!"); + MRI.replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS.setRegUsed(ScratchReg); + } + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != std::prev(J)) { + MBB.splice(J, &MBB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS.getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS.unprocess(P); + } else + ++I; + } + } + + MRI.clearVirtRegs(); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +namespace { +/// This class runs register scavenging independ of the PrologEpilogInserter. +/// This is used in for testing. +class ScavengerTest : public MachineFunctionPass { +public: + static char ID; + ScavengerTest() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetFrameLowering &TFL = *STI.getFrameLowering(); + + RegScavenger RS; + // Let's hope that calling those outside of PrologEpilogueInserter works + // well enough to initialize the scavenger with some emergency spillslots + // for the target. + BitVector SavedRegs; + TFL.determineCalleeSaves(MF, SavedRegs, &RS); + TFL.processFunctionBeforeFrameFinalized(MF, &RS); + + // Let's scavenge the current function + scavengeFrameVirtualRegs(MF, RS); + return true; + } +}; +char ScavengerTest::ID; + +} // end anonymous namespace + +INITIALIZE_PASS(ScavengerTest, "scavenger-test", + "Scavenge virtual registers inside basic blocks", false, false) diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 8035ea80364b..3fdbd2459361 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -12,32 +12,54 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <string> +#include <utility> +#include <vector> using namespace llvm; @@ -90,11 +112,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), - RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr), + RemoveKillFlags(RemoveKillFlags), UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction()->getContext()))), - FirstDbgValue(nullptr) { + Type::getVoidTy(mf.getFunction()->getContext()))) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -126,7 +146,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { return V; } assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); - } while (1); + } while (true); } /// This is a wrapper around GetUnderlyingObjects and adds support for basic @@ -563,7 +583,7 @@ void ScheduleDAGInstrs::initSUnits() { // which is contained within a basic block. SUnits.reserve(NumRegionInstrs); - for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) { + for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { if (MI.isDebugValue()) continue; @@ -606,13 +626,13 @@ void ScheduleDAGInstrs::initSUnits() { class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> { /// Current total number of SUs in map. - unsigned NumNodes; + unsigned NumNodes = 0; /// 1 for loads, 0 for stores. (see comment in SUList) unsigned TrueMemOrderLatency; public: - Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} + Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {} /// To keep NumNodes up to date, insert() is used instead of /// this operator w/ push_back(). @@ -630,7 +650,7 @@ public: void inline clearList(ValueType V) { iterator Itr = find(V); if (Itr != end()) { - assert (NumNodes >= Itr->second.size()); + assert(NumNodes >= Itr->second.size()); NumNodes -= Itr->second.size(); Itr->second.clear(); @@ -646,7 +666,7 @@ public: unsigned inline size() const { return NumNodes; } /// Counts the number of SUs in this map after a reduction. - void reComputeSize(void) { + void reComputeSize() { NumNodes = 0; for (auto &I : *this) NumNodes += I.second.size(); @@ -676,7 +696,7 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, } void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); + assert(BarrierChain != nullptr); for (auto &I : map) { SUList &sus = I.second; @@ -687,7 +707,7 @@ void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { } void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); + assert(BarrierChain != nullptr); // Go through all lists of SUs. for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { @@ -1028,7 +1048,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, // The N last elements in NodeNums will be removed, and the SU with // the lowest NodeNum of them will become the new BarrierChain to // let the not yet seen SUs have a dependency to the removed SUs. - assert (N <= NodeNums.size()); + assert(N <= NodeNums.size()); SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; if (BarrierChain) { // The aliasing and non-aliasing maps reduce independently of each @@ -1156,6 +1176,7 @@ std::string ScheduleDAGInstrs::getDAGName() const { //===----------------------------------------------------------------------===// namespace llvm { + /// Internal state used to compute SchedDFSResult. class SchedDFSImpl { SchedDFSResult &R; @@ -1163,16 +1184,16 @@ class SchedDFSImpl { /// Join DAG nodes into equivalence classes by their subtree. IntEqClasses SubtreeClasses; /// List PredSU, SuccSU pairs that represent data edges between subtrees. - std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; + std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs; struct RootData { unsigned NodeID; unsigned ParentNodeID; ///< Parent node (member of the parent subtree). - unsigned SubInstrCount; ///< Instr count in this tree only, not children. + unsigned SubInstrCount = 0; ///< Instr count in this tree only, not + /// children. RootData(unsigned id): NodeID(id), - ParentNodeID(SchedDFSResult::InvalidSubtreeID), - SubInstrCount(0) {} + ParentNodeID(SchedDFSResult::InvalidSubtreeID) {} unsigned getSparseSetIndex() const { return NodeID; } }; @@ -1340,12 +1361,15 @@ protected: } while (FromTree != SchedDFSResult::InvalidSubtreeID); } }; + } // end namespace llvm namespace { + /// Manage the stack used by a reverse depth-first search over the DAG. class SchedDAGReverseDFS { - std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; + std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack; + public: bool isComplete() const { return DFSStack.empty(); } @@ -1367,7 +1391,8 @@ public: return getCurr()->Preds.end(); } }; -} // anonymous + +} // end anonymous namespace static bool hasDataSucc(const SUnit *SU) { for (const SDep &SuccDep : SU->Succs) { @@ -1392,7 +1417,7 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { SchedDAGReverseDFS DFS; Impl.visitPreorder(&SU); DFS.follow(&SU); - for (;;) { + while (true) { // Traverse the leftmost path as far as possible. while (DFS.getPred() != DFS.getPredEnd()) { const SDep &PredDep = *DFS.getPred(); @@ -1457,4 +1482,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { } } // end namespace llvm + #endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5f167f8de1cf..9355dbe77f94 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -225,6 +225,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } return TranslateLegalizeResults(Op, Lowered); } + LLVM_FALLTHROUGH; case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandLoad(Op)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 177898e1e950..80a03ea4eea0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// // // The LLVM Compiler Infrastructure // @@ -11,29 +11,46 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/IR/CallingConv.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -41,16 +58,20 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> -#include <cmath> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <limits> +#include <set> +#include <string> #include <utility> +#include <vector> using namespace llvm; @@ -269,7 +290,6 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { return ISD::CondCode(Operation); } - /// For an integer comparison, return 1 if the comparison is a signed operation /// and 2 if the result is an unsigned comparison. Return zero if the operation /// does not depend on the sign of the input (setne and seteq). @@ -338,7 +358,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, //===----------------------------------------------------------------------===// /// AddNodeIDOpcode - Add the node opcode to the NodeID data. -/// static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { ID.AddInteger(OpC); } @@ -350,7 +369,6 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. -/// static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDValue> Ops) { for (auto& Op : Ops) { @@ -360,7 +378,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. -/// static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDUse> Ops) { for (auto& Op : Ops) { @@ -392,10 +409,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } case ISD::TargetConstantFP: - case ISD::ConstantFP: { + case ISD::ConstantFP: ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); break; - } case ISD::TargetGlobalAddress: case ISD::GlobalAddress: case ISD::TargetGlobalTLSAddress: @@ -770,7 +786,6 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { /// maps and modified in place. Add it back to the CSE maps, unless an identical /// node already exists, in which case transfer all its users to the existing /// node. This transfer can potentially trigger recursive merging. -/// void SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // For node types that aren't CSE'd, just act as if no identical node @@ -835,7 +850,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, return Node; } - /// FindModifiedNodeSlot - Find a slot for the specified node if its operands /// were replaced with those specified. If this node is never memoized, /// return null, otherwise return a pointer to the slot it would take. If a @@ -864,10 +878,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(nullptr) { + Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -1038,7 +1051,6 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, } /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). -/// SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = @@ -1317,7 +1329,6 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, return SDValue(N, 0); } - SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, @@ -1451,7 +1462,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // Validate that all indices in Mask are within the range of the elements // input to the shuffle. int NElts = Mask.size(); - assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) && "Index out of range"); // Copy the mask so we can do any needed cleanup. @@ -2918,7 +2929,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, else DemandedRHS.setBit((unsigned)M % NumElts); } - Tmp = UINT_MAX; + Tmp = std::numeric_limits<unsigned>::max(); if (!!DemandedLHS) Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); if (!!DemandedRHS) { @@ -3122,7 +3133,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned EltIdx = CEltNo->getZExtValue(); // If we demand the inserted element then get its sign bits. - Tmp = UINT_MAX; + Tmp = std::numeric_limits<unsigned>::max(); if (DemandedElts[EltIdx]) { // TODO - handle implicit truncation of inserted elements. if (InVal.getScalarValueSizeInBits() != VTBits) @@ -3188,7 +3199,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::CONCAT_VECTORS: // Determine the minimum number of sign bits across all demanded // elts of the input vectors. Early out if the result is already 1. - Tmp = UINT_MAX; + Tmp = std::numeric_limits<unsigned>::max(); EVT SubVectorVT = Op.getOperand(0).getValueType(); unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); unsigned NumSubVectors = Op.getNumOperands(); @@ -3327,7 +3338,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, - llvm::SelectionDAG &DAG) { + SelectionDAG &DAG) { assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); assert(llvm::all_of(Ops, [Ops](SDValue Op) { @@ -3836,8 +3847,9 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { return true; return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && - any_of(Divisor->op_values(), - [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + llvm::any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || + isNullConstant(V); }); // TODO: Handle signed overflow. } // TODO: Handle oversized shifts. @@ -3948,8 +3960,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // All operands must be vector types with the same number of elements as // the result type and must be either UNDEF or a build vector of constant // or UNDEF scalars. - if (!all_of(Ops, IsConstantBuildVectorOrUndef) || - !all_of(Ops, IsScalarOrSameVectorSize)) + if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || + !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); // If we are comparing vectors, then the result needs to be a i1 boolean @@ -5550,7 +5562,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, Opcode == ISD::PREFETCH || Opcode == ISD::LIFETIME_START || Opcode == ISD::LIFETIME_END || - (Opcode <= INT_MAX && + ((int)Opcode <= std::numeric_limits<int>::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -5884,7 +5896,6 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; FoldingSetNodeID ID; @@ -6038,13 +6049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, switch (Opcode) { default: break; - case ISD::CONCAT_VECTORS: { + case ISD::CONCAT_VECTORS: // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; - } - case ISD::SELECT_CC: { + case ISD::SELECT_CC: assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); assert(Ops[0].getValueType() == Ops[1].getValueType() && "LHS and RHS of condition must have same type!"); @@ -6053,14 +6063,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); break; - } - case ISD::BR_CC: { + case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); assert(Ops[2].getValueType() == Ops[3].getValueType() && "LHS/RHS of comparison should match types!"); break; } - } // Memoize nodes. SDNode *N; @@ -6599,7 +6607,6 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { return Res; } - /// getMachineNode - These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. /// @@ -6812,7 +6819,7 @@ public: : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} }; -} +} // end anonymous namespace /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. @@ -6858,7 +6865,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { AddModifiedNodeToCSEMaps(User); } - // If we just RAUW'd the root, take note. if (FromN == getRoot()) setRoot(To); @@ -7028,6 +7034,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ } namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith /// to record information about a use. struct UseMemo { @@ -7040,7 +7047,8 @@ namespace { bool operator<(const UseMemo &L, const UseMemo &R) { return (intptr_t)L.User < (intptr_t)R.User; } -} + +} // end anonymous namespace /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value @@ -7106,7 +7114,6 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, /// based on their topological order. It returns the maximum id and a vector /// of the SDNodes* in assigned order by reference. unsigned SelectionDAG::AssignTopologicalOrder() { - unsigned DAGSize = 0; // SortedPos tracks the progress of the algorithm. Nodes before it are @@ -7333,6 +7340,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { } namespace { + struct EVTArray { std::vector<EVT> VTs; @@ -7342,11 +7350,12 @@ namespace { VTs.push_back(MVT((MVT::SimpleValueType)i)); } }; -} -static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs; +} // end anonymous namespace + +static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs; static ManagedStatic<EVTArray> SimpleVTArray; -static ManagedStatic<sys::SmartMutex<true> > VTMutex; +static ManagedStatic<sys::SmartMutex<true>> VTMutex; /// getValueTypeList - Return a pointer to the specified value type. /// @@ -7380,7 +7389,6 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { return NUses == 0; } - /// hasAnyUseOfValue - Return true if there are any use of the indicated /// value. This method ignores uses of other values defined by this operation. bool SDNode::hasAnyUseOfValue(unsigned Value) const { @@ -7393,9 +7401,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { return false; } - /// isOnlyUserOf - Return true if this node is the only use of N. -/// bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { @@ -7425,7 +7431,6 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { } /// isOperand - Return true if this node is an operand of N. -/// bool SDValue::isOperandOf(const SDNode *N) const { for (const SDValue &Op : N->op_values()) if (*this == Op) @@ -7475,7 +7480,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, } // Next, try a deep search: check whether every operand of the TokenFactor // reaches Dest. - return all_of((*this)->ops(), [=](SDValue Op) { + return llvm::all_of((*this)->ops(), [=](SDValue Op) { return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); }); } @@ -7627,7 +7632,6 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } - /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { @@ -7718,7 +7722,6 @@ unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); } - Type *ConstantPoolSDNode::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 687b882c5e4d..b5ccd64ee76c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2022,7 +2022,7 @@ static SDNode *findGlueUse(SDNode *N) { } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". -/// This function recursively traverses up the operand chain, ignoring +/// This function iteratively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, @@ -2035,30 +2035,36 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // The Use may be -1 (unassigned) if it is a newly allocated node. This can // happen because we scan down to newly selected nodes in the case of glue // uses. - if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) - return false; + std::vector<SDNode *> WorkList; + WorkList.push_back(Use); - // Don't revisit nodes if we already scanned it and didn't fail, we know we - // won't fail if we scan it again. - if (!Visited.insert(Use).second) - return false; + while (!WorkList.empty()) { + Use = WorkList.back(); + WorkList.pop_back(); + if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) + continue; - for (const SDValue &Op : Use->op_values()) { - // Ignore chain uses, they are validated by HandleMergeInputChains. - if (Op.getValueType() == MVT::Other && IgnoreChains) + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use).second) continue; - SDNode *N = Op.getNode(); - if (N == Def) { - if (Use == ImmedUse || Use == Root) - continue; // We are not looking for immediate use. - assert(N != Root); - return true; - } + for (const SDValue &Op : Use->op_values()) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Op.getValueType() == MVT::Other && IgnoreChains) + continue; - // Traverse up the operand chain. - if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) - return true; + SDNode *N = Op.getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + WorkList.push_back(N); + } } return false; } diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 1c66649cae01..eed667dbe7e0 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -818,7 +818,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, SI.GCTransitionArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); SI.ID = ISP.getID(); - SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end()); + SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end()); SI.StatepointFlags = ISP.getFlags(); SI.NumPatchBytes = ISP.getNumPatchBytes(); SI.EHPadBB = EHPadBB; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0dffffee9976..adb2b188265b 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1493,8 +1493,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS and fold constant comparisons. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); if (isa<ConstantSDNode>(N0.getNode()) && (DCI.isBeforeLegalizeOps() || @@ -1638,14 +1637,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), TopSetCC.getOperand(1), InvCond); - } } } - // If the LHS is '(and load, const)', the RHS is 0, - // the test is for equality or unsigned, and all 1 bits of the const are - // in the same partial word, see if we can shorten the load. + // If the LHS is '(and load, const)', the RHS is 0, the test is for + // equality or unsigned, and all 1 bits of the const are in the same + // partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && @@ -1669,10 +1667,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if ((newMask & Mask) == Mask) { - if (!DAG.getDataLayout().isLittleEndian()) - bestOffset = (origWidth/width - offset - 1) * (width/8); - else + if (DAG.getDataLayout().isLittleEndian()) bestOffset = (uint64_t)offset * (width/8); + else + bestOffset = (origWidth/width - offset - 1) * (width/8); bestMask = Mask.lshr(offset * (width/8) * 8); bestWidth = width; break; @@ -1713,10 +1711,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (Cond) { case ISD::SETUGT: case ISD::SETUGE: - case ISD::SETEQ: return DAG.getConstant(0, dl, VT); + case ISD::SETEQ: + return DAG.getConstant(0, dl, VT); case ISD::SETULT: case ISD::SETULE: - case ISD::SETNE: return DAG.getConstant(1, dl, VT); + case ISD::SETNE: + return DAG.getConstant(1, dl, VT); case ISD::SETGT: case ISD::SETGE: // True if the sign bit of C1 is set. @@ -1816,9 +1816,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, BitWidth-1))) { // Okay, get the un-inverted input value. SDValue Val; - if (N0.getOpcode() == ISD::XOR) + if (N0.getOpcode() == ISD::XOR) { Val = N0.getOperand(0); - else { + } else { assert(N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR); // ((X^1)&1)^1 -> X & 1 @@ -1883,7 +1883,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true + // X >= MIN --> true + if (C1 == MinVal) + return DAG.getConstant(1, dl, VT); + // X >= C0 --> X > (C0 - 1) APInt C = C1 - 1; ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; @@ -1898,7 +1901,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true + // X <= MAX --> true + if (C1 == MaxVal) + return DAG.getConstant(1, dl, VT); + // X <= C0 --> X < (C0 + 1) APInt C = C1 + 1; ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 900c0318b179..c43a5e18ad23 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1456,6 +1456,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; + LLVM_FALLTHROUGH; } case TypeWidenVector: { // Try to widen the vector. @@ -1473,6 +1474,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; + LLVM_FALLTHROUGH; } case TypeSplitVector: case TypeScalarizeVector: { diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 282e3103adc9..711144fc2faa 100644 --- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -27,6 +27,14 @@ Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) { Error CodeViewRecordIO::endRecord() { assert(!Limits.empty() && "Not in a record!"); Limits.pop_back(); + // We would like to assert that we actually read / wrote all the bytes that we + // expected to for this record, but unfortunately we can't do this. Some + // producers such as MASM over-allocate for certain types of records and + // commit the extraneous data, so when reading we can't be sure every byte + // will have been read. And when writing we over-allocate temporarily since + // we don't know how big the record is until we're finished writing it, so + // even though we don't commit the extraneous data, we still can't guarantee + // we're at the end of the allocated data. return Error::success(); } @@ -49,6 +57,12 @@ uint32_t CodeViewRecordIO::maxFieldLength() const { return *Min; } +Error CodeViewRecordIO::padToAlignment(uint32_t Align) { + if (isReading()) + return Reader->padToAlignment(Align); + return Writer->padToAlignment(Align); +} + Error CodeViewRecordIO::skipPadding() { assert(!isWriting() && "Cannot skip padding while writing!"); diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp index b8741eb0b675..2e72242181b0 100644 --- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp +++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp @@ -72,7 +72,7 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const { uint32_t DebugStringTableSubsection::size() const { return Strings.size(); } uint32_t DebugStringTableSubsection::getStringId(StringRef S) const { - auto P = Strings.find(S); - assert(P != Strings.end()); - return P->second; + auto Iter = Strings.find(S); + assert(Iter != Strings.end()); + return Iter->second; } diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index 511f36d0020a..cfd1c5d3ab0c 100644 --- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -16,14 +16,17 @@ using namespace llvm; using namespace llvm::codeview; DebugSubsectionRecord::DebugSubsectionRecord() - : Kind(DebugSubsectionKind::None) {} + : Container(CodeViewContainer::ObjectFile), + Kind(DebugSubsectionKind::None) {} DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind, - BinaryStreamRef Data) - : Kind(Kind), Data(Data) {} + BinaryStreamRef Data, + CodeViewContainer Container) + : Container(Container), Kind(Kind), Data(Data) {} Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, - DebugSubsectionRecord &Info) { + DebugSubsectionRecord &Info, + CodeViewContainer Container) { const DebugSubsectionHeader *Header; BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Header)) @@ -41,13 +44,14 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, } if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) return EC; + Info.Container = Container; Info.Kind = Kind; return Error::success(); } uint32_t DebugSubsectionRecord::getRecordLength() const { uint32_t Result = sizeof(DebugSubsectionHeader) + Data.getLength(); - assert(Result % 4 == 0); + assert(Result % alignOf(Container) == 0); return Result; } @@ -56,25 +60,29 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; } DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( - DebugSubsectionKind Kind, DebugSubsection &Frag) - : Kind(Kind), Frag(Frag) {} + std::unique_ptr<DebugSubsection> Subsection, CodeViewContainer Container) + : Subsection(std::move(Subsection)), Container(Container) {} uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { - uint32_t Size = sizeof(DebugSubsectionHeader) + - alignTo(Frag.calculateSerializedSize(), 4); + uint32_t Size = + sizeof(DebugSubsectionHeader) + + alignTo(Subsection->calculateSerializedSize(), alignOf(Container)); return Size; } Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) { + assert(Writer.getOffset() % alignOf(Container) == 0 && + "Debug Subsection not properly aligned"); + DebugSubsectionHeader Header; - Header.Kind = uint32_t(Kind); + Header.Kind = uint32_t(Subsection->kind()); Header.Length = calculateSerializedLength() - sizeof(DebugSubsectionHeader); if (auto EC = Writer.writeObject(Header)) return EC; - if (auto EC = Frag.commit(Writer)) + if (auto EC = Subsection->commit(Writer)) return EC; - if (auto EC = Writer.padToAlignment(4)) + if (auto EC = Writer.padToAlignment(alignOf(Container))) return EC; return Error::success(); diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 3d49a7198d1a..66045933ce9b 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -668,7 +668,7 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) { Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); + SymbolDeserializer Deserializer(ObjDelegate.get(), Container); CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); @@ -679,7 +679,7 @@ Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) { Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); + SymbolDeserializer Deserializer(ObjDelegate.get(), Container); CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index bb1731465495..ea46841a70f6 100644 --- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -40,6 +40,7 @@ Error SymbolRecordMapping::visitSymbolBegin(CVSymbol &Record) { } Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) { + error(IO.padToAlignment(alignOf(Container))); error(IO.endRecord()); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp index 251cc431f52b..9f2d619d1a1c 100644 --- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -12,9 +12,11 @@ using namespace llvm; using namespace llvm::codeview; -SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator) - : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little), - Writer(Stream), Mapping(Writer) { } +SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator, + CodeViewContainer Container) + : Storage(Allocator), RecordBuffer(MaxRecordLength), + Stream(RecordBuffer, llvm::support::little), Writer(Stream), + Mapping(Writer, Container) {} Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!"); diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp index dfdeb8414212..faf2442bc94b 100644 --- a/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -47,42 +47,46 @@ static Interval intersect(const Interval &I1, const Interval &I2) { MappedBlockStream::MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - BinaryStreamRef MsfData) - : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData) {} - -std::unique_ptr<MappedBlockStream> -MappedBlockStream::createStream(uint32_t BlockSize, - const MSFStreamLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) + : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData), + Allocator(Allocator) {} + +std::unique_ptr<MappedBlockStream> MappedBlockStream::createStream( + uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>( - BlockSize, Layout, MsfData); + BlockSize, Layout, MsfData, Allocator); } std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream( - const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex) { + const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex, + BumpPtrAllocator &Allocator) { assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index"); MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>( - Layout.SB->BlockSize, SL, MsfData); + Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr<MappedBlockStream> MappedBlockStream::createDirectoryStream(const MSFLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr<MappedBlockStream> MappedBlockStream::createFpmStream(const MSFLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, @@ -148,7 +152,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, // into it, and return an ArrayRef to that. Do not touch existing pool // allocations, as existing clients may be holding a pointer which must // not be invalidated. - uint8_t *WriteBuffer = static_cast<uint8_t *>(Pool.Allocate(Size, 8)); + uint8_t *WriteBuffer = static_cast<uint8_t *>(Allocator.Allocate(Size, 8)); if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size))) return EC; @@ -269,10 +273,6 @@ Error MappedBlockStream::readBytes(uint32_t Offset, return Error::success(); } -uint32_t MappedBlockStream::getNumBytesCopied() const { - return static_cast<uint32_t>(Pool.getBytesAllocated()); -} - void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); } void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, @@ -313,43 +313,48 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, WritableMappedBlockStream::WritableMappedBlockStream( uint32_t BlockSize, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData) - : ReadInterface(BlockSize, Layout, MsfData), WriteInterface(MsfData) {} + WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator) + : ReadInterface(BlockSize, Layout, MsfData, Allocator), + WriteInterface(MsfData) {} std::unique_ptr<WritableMappedBlockStream> WritableMappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData) { + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>( - BlockSize, Layout, MsfData); + BlockSize, Layout, MsfData, Allocator); } std::unique_ptr<WritableMappedBlockStream> WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, - uint32_t StreamIndex) { + uint32_t StreamIndex, + BumpPtrAllocator &Allocator) { assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index"); MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; - return createStream(Layout.SB->BlockSize, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr<WritableMappedBlockStream> WritableMappedBlockStream::createDirectoryStream( - const MSFLayout &Layout, WritableBinaryStreamRef MsfData) { + const MSFLayout &Layout, WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr<WritableMappedBlockStream> WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout, - WritableBinaryStreamRef MsfData) { + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index b28ec2ff33ac..22c2ef31bd71 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -66,7 +66,11 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { Symbols.push_back(Symbol); - SymbolByteSize += Symbol.data().size(); + // Symbols written to a PDB file are required to be 4 byte aligned. The same + // is not true of object files. + assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 && + "Invalid Symbol alignment!"); + SymbolByteSize += Symbol.length(); } void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { @@ -140,7 +144,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, if (Layout.ModDiStream != kInvalidStreamIndex) { auto NS = WritableMappedBlockStream::createIndexedStream( - MsfLayout, MsfBuffer, Layout.ModDiStream); + MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator()); WritableBinaryStreamRef Ref(*NS); BinaryStreamWriter SymbolWriter(Ref); // Write the symbols. @@ -153,7 +157,8 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) return EC; // TODO: Write C11 Line data - + assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 && + "Invalid debug section alignment!"); for (const auto &Builder : C13Builders) { assert(Builder && "Empty C13 Fragment Builder!"); if (auto EC = Builder->commit(SymbolWriter)) @@ -169,42 +174,9 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, return Error::success(); } -void DbiModuleDescriptorBuilder::addC13Fragment( - std::unique_ptr<DebugLinesSubsection> Lines) { - DebugLinesSubsection &Frag = *Lines; - - // File Checksums have to come first, so push an empty entry on if this - // is the first. - if (C13Builders.empty()) - C13Builders.push_back(nullptr); - - this->LineInfo.push_back(std::move(Lines)); - C13Builders.push_back( - llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag)); -} - -void DbiModuleDescriptorBuilder::addC13Fragment( - std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees) { - DebugInlineeLinesSubsection &Frag = *Inlinees; - - // File Checksums have to come first, so push an empty entry on if this - // is the first. - if (C13Builders.empty()) - C13Builders.push_back(nullptr); - - this->Inlinees.push_back(std::move(Inlinees)); - C13Builders.push_back( - llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag)); -} - -void DbiModuleDescriptorBuilder::setC13FileChecksums( - std::unique_ptr<DebugChecksumsSubsection> Checksums) { - assert(!ChecksumInfo && "Can't have more than one checksum info!"); - - if (C13Builders.empty()) - C13Builders.push_back(nullptr); - - ChecksumInfo = std::move(Checksums); - C13Builders[0] = llvm::make_unique<DebugSubsectionRecordBuilder>( - ChecksumInfo->kind(), *ChecksumInfo); +void DbiModuleDescriptorBuilder::addDebugSubsection( + std::unique_ptr<DebugSubsection> Subsection) { + assert(Subsection); + C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>( + std::move(Subsection), CodeViewContainer::Pdb)); } diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index 2f4fb6cc295d..320b11dc5cab 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -252,7 +252,7 @@ Error DbiStream::initializeSectionHeadersData() { return make_error<RawError>(raw_error_code::no_stream); auto SHS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator()); size_t StreamLen = SHS->getLength(); if (StreamLen % sizeof(object::coff_section)) @@ -284,7 +284,7 @@ Error DbiStream::initializeFpoRecords() { return make_error<RawError>(raw_error_code::no_stream); auto FS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator()); size_t StreamLen = FS->getLength(); if (StreamLen % sizeof(object::FpoData)) diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index 23c7456d7772..55c20fdb9af6 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -357,8 +357,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = finalize()) return EC; - auto DbiS = WritableMappedBlockStream::createIndexedStream(Layout, MsfBuffer, - StreamDBI); + auto DbiS = WritableMappedBlockStream::createIndexedStream( + Layout, MsfBuffer, StreamDBI, Allocator); BinaryStreamWriter Writer(*DbiS); if (auto EC = Writer.writeObject(*Header)) @@ -396,7 +396,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (Stream.StreamNumber == kInvalidStreamIndex) continue; auto WritableStream = WritableMappedBlockStream::createIndexedStream( - Layout, MsfBuffer, Stream.StreamNumber); + Layout, MsfBuffer, Stream.StreamNumber, Allocator); BinaryStreamWriter DbgStreamWriter(*WritableStream); if (auto EC = DbgStreamWriter.writeArray(Stream.Data)) return EC; diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp index f019d410328a..707128f7efd4 100644 --- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -50,8 +50,8 @@ Error InfoStreamBuilder::finalizeMsfLayout() { Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer) const { - auto InfoS = - WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB); + auto InfoS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, StreamPDB, Msf.getAllocator()); BinaryStreamWriter Writer(*InfoS); InfoStreamHeader H; diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index d7a203746a0d..c4ff30011a17 100644 --- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp +++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -55,9 +55,9 @@ Error ModuleDebugStreamRef::reload() { if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) return EC; - BinaryStreamReader LineReader(C13LinesSubstream); - if (auto EC = - LineReader.readArray(LinesAndChecksums, LineReader.bytesRemaining())) + BinaryStreamReader SubsectionsReader(C13LinesSubstream); + if (auto EC = SubsectionsReader.readArray(Subsections, + SubsectionsReader.bytesRemaining())) return EC; uint32_t GlobalRefsSize; @@ -77,13 +77,27 @@ ModuleDebugStreamRef::symbols(bool *HadError) const { return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end()); } -llvm::iterator_range<ModuleDebugStreamRef::LinesAndChecksumsIterator> -ModuleDebugStreamRef::linesAndChecksums() const { - return make_range(LinesAndChecksums.begin(), LinesAndChecksums.end()); +llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator> +ModuleDebugStreamRef::subsections() const { + return make_range(Subsections.begin(), Subsections.end()); } -bool ModuleDebugStreamRef::hasLineInfo() const { +bool ModuleDebugStreamRef::hasDebugSubsections() const { return C13LinesSubstream.getLength() > 0; } Error ModuleDebugStreamRef::commit() { return Error::success(); } + +Expected<codeview::DebugChecksumsSubsectionRef> +ModuleDebugStreamRef::findChecksumsSubsection() const { + for (const auto &SS : subsections()) { + if (SS.kind() != DebugSubsectionKind::FileChecksums) + continue; + + codeview::DebugChecksumsSubsectionRef Result; + if (auto EC = Result.initialize(SS.getRecordData())) + return std::move(EC); + return Result; + } + return make_error<RawError>(raw_error_code::no_entry); +} diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 859295d2c7d3..1254e23c73eb 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -146,7 +146,8 @@ Error PDBFile::parseFileHeaders() { // at getBlockSize() intervals, so we have to be compatible. // See the function fpmPn() for more information: // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 - auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer); + auto FpmStream = + MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); BinaryStreamReader FpmReader(*FpmStream); ArrayRef<uint8_t> FpmBytes; if (auto EC = FpmReader.readBytes(FpmBytes, @@ -184,7 +185,8 @@ Error PDBFile::parseStreamData() { // is exactly what we are attempting to parse. By specifying a custom // subclass of IPDBStreamData which only accesses the fields that have already // been parsed, we can avoid this and reuse MappedBlockStream. - auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer); + auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, + Allocator); BinaryStreamReader Reader(*DS); if (auto EC = Reader.readInteger(NumStreams)) return EC; @@ -407,5 +409,6 @@ PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, uint32_t StreamIndex) const { if (StreamIndex >= getNumStreams()) return make_error<RawError>(raw_error_code::no_stream); - return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex); + return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex, + Allocator); } diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index c6568029ec55..2c6465e6fb2a 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -140,8 +140,8 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (auto EC = Writer.writeArray(Layout.DirectoryBlocks)) return EC; - auto DirStream = - WritableMappedBlockStream::createDirectoryStream(Layout, Buffer); + auto DirStream = WritableMappedBlockStream::createDirectoryStream( + Layout, Buffer, Allocator); BinaryStreamWriter DW(*DirStream); if (auto EC = DW.writeInteger<uint32_t>(Layout.StreamSizes.size())) return EC; @@ -158,8 +158,8 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (!ExpectedSN) return ExpectedSN.takeError(); - auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - *ExpectedSN); + auto NS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, *ExpectedSN, Allocator); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index e84573fe07b8..6013c342cf02 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -56,6 +56,10 @@ Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { return Error::success(); } +codeview::DebugStringTableSubsectionRef PDBStringTable::getStringTable() const { + return Strings; +} + Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { const support::ulittle32_t *HashCount; if (auto EC = Reader.readObject(HashCount)) diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp index 623afb371b50..67c803d3124e 100644 --- a/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -32,8 +32,7 @@ using namespace llvm::support; using namespace llvm::msf; using namespace llvm::pdb; -TpiStream::TpiStream(const PDBFile &File, - std::unique_ptr<MappedBlockStream> Stream) +TpiStream::TpiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream) : Pdb(File), Stream(std::move(Stream)) {} TpiStream::~TpiStream() = default; @@ -77,7 +76,8 @@ Error TpiStream::reload() { "Invalid TPI hash stream index."); auto HS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex, + Pdb.getAllocator()); BinaryStreamReader HSR(*HS); // There should be a hash value for every type record, or no hashes at all. diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 20456cc97823..9e943c7f114d 100644 --- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -147,8 +147,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = finalize()) return EC; - auto InfoS = - WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx); + auto InfoS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, + Idx, Allocator); BinaryStreamWriter Writer(*InfoS); if (auto EC = Writer.writeObject(*Header)) @@ -159,8 +159,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; if (HashStreamIndex != kInvalidStreamIndex) { - auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - HashStreamIndex); + auto HVS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, HashStreamIndex, Allocator); BinaryStreamWriter HW(*HVS); if (HashValueStream) { if (auto EC = HW.writeStreamRef(*HashValueStream)) diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 7e6f9a7804b9..7754ac03b43d 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -39,6 +39,21 @@ void DIBuilder::trackIfUnresolved(MDNode *N) { UnresolvedNodes.emplace_back(N); } +void DIBuilder::finalizeSubprogram(DISubprogram *SP) { + MDTuple *Temp = SP->getVariables().get(); + if (!Temp || !Temp->isTemporary()) + return; + + SmallVector<Metadata *, 4> Variables; + + auto PV = PreservedVariables.find(SP); + if (PV != PreservedVariables.end()) + Variables.append(PV->second.begin(), PV->second.end()); + + DINodeArray AV = getOrCreateArray(Variables); + TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); +} + void DIBuilder::finalize() { if (!CUNode) { assert(!AllowUnresolvedNodes && @@ -62,25 +77,11 @@ void DIBuilder::finalize() { CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues)); DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms); - auto resolveVariables = [&](DISubprogram *SP) { - MDTuple *Temp = SP->getVariables().get(); - if (!Temp) - return; - - SmallVector<Metadata *, 4> Variables; - - auto PV = PreservedVariables.find(SP); - if (PV != PreservedVariables.end()) - Variables.append(PV->second.begin(), PV->second.end()); - - DINodeArray AV = getOrCreateArray(Variables); - TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); - }; for (auto *SP : SPs) - resolveVariables(SP); + finalizeSubprogram(SP); for (auto *N : RetainValues) if (auto *SP = dyn_cast<DISubprogram>(N)) - resolveVariables(SP); + finalizeSubprogram(SP); if (!AllGVs.empty()) CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs)); diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index b7e3f0c6779e..0485fece7c42 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -99,87 +99,6 @@ DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt, return Last; } -/// Reparent \c Scope from \c OrigSP to \c NewSP. -static DIScope *reparentScope(LLVMContext &Ctx, DIScope *Scope, - DISubprogram *OrigSP, DISubprogram *NewSP, - DenseMap<const MDNode *, MDNode *> &Cache) { - SmallVector<DIScope *, 3> ScopeChain; - DIScope *Last = NewSP; - DIScope *CurScope = Scope; - do { - if (auto *SP = dyn_cast<DISubprogram>(CurScope)) { - // Don't rewrite this scope chain if it doesn't lead to the replaced SP. - if (SP != OrigSP) - return Scope; - Cache.insert({OrigSP, NewSP}); - break; - } - if (auto *Found = Cache[CurScope]) { - Last = cast<DIScope>(Found); - break; - } - ScopeChain.push_back(CurScope); - } while ((CurScope = CurScope->getScope().resolve())); - - // Starting from the top, rebuild the nodes to point to the new inlined-at - // location (then rebuilding the rest of the chain behind it) and update the - // map of already-constructed inlined-at nodes. - for (const DIScope *MD : reverse(ScopeChain)) { - if (auto *LB = dyn_cast<DILexicalBlock>(MD)) - Cache[MD] = Last = DILexicalBlock::getDistinct( - Ctx, Last, LB->getFile(), LB->getLine(), LB->getColumn()); - else if (auto *LB = dyn_cast<DILexicalBlockFile>(MD)) - Cache[MD] = Last = DILexicalBlockFile::getDistinct( - Ctx, Last, LB->getFile(), LB->getDiscriminator()); - else - llvm_unreachable("illegal parent scope"); - } - return Last; -} - -void DebugLoc::reparentDebugInfo(Instruction &I, DISubprogram *OrigSP, - DISubprogram *NewSP, - DenseMap<const MDNode *, MDNode *> &Cache) { - auto DL = I.getDebugLoc(); - if (!OrigSP || !NewSP || OrigSP == NewSP || !DL) - return; - - // Reparent the debug location. - auto &Ctx = I.getContext(); - DILocation *InlinedAt = DL->getInlinedAt(); - if (InlinedAt) { - while (auto *IA = InlinedAt->getInlinedAt()) - InlinedAt = IA; - auto NewScope = - reparentScope(Ctx, InlinedAt->getScope(), OrigSP, NewSP, Cache); - InlinedAt = - DebugLoc::get(InlinedAt->getLine(), InlinedAt->getColumn(), NewScope); - } - I.setDebugLoc( - DebugLoc::get(DL.getLine(), DL.getCol(), - reparentScope(Ctx, DL->getScope(), OrigSP, NewSP, Cache), - DebugLoc::appendInlinedAt(DL, InlinedAt, Ctx, Cache, - ReplaceLastInlinedAt))); - - // Fix up debug variables to point to NewSP. - auto reparentVar = [&](DILocalVariable *Var) { - return DILocalVariable::get( - Ctx, - cast<DILocalScope>( - reparentScope(Ctx, Var->getScope(), OrigSP, NewSP, Cache)), - Var->getName(), Var->getFile(), Var->getLine(), Var->getType(), - Var->getArg(), Var->getFlags(), Var->getAlignInBits()); - }; - if (auto *DbgValue = dyn_cast<DbgValueInst>(&I)) { - auto *Var = DbgValue->getVariable(); - I.setOperand(2, MetadataAsValue::get(Ctx, reparentVar(Var))); - } else if (auto *DbgDeclare = dyn_cast<DbgDeclareInst>(&I)) { - auto *Var = DbgDeclare->getVariable(); - I.setOperand(1, MetadataAsValue::get(Ctx, reparentVar(Var))); - } -} - - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void DebugLoc::dump() const { if (!Loc) diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp index b670c817569a..a03a6fb62237 100644 --- a/lib/IR/OptBisect.cpp +++ b/lib/IR/OptBisect.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/Module.h" #include "llvm/IR/OptBisect.h" #include "llvm/Pass.h" @@ -53,13 +54,20 @@ static std::string getDescription(const BasicBlock &BB) { } static std::string getDescription(const Loop &L) { - // FIXME: I'd like to be able to provide a better description here, but - // calling L->getHeader() would introduce a new dependency on the - // LLVMCore library. + // FIXME: Move into LoopInfo so we can get a better description + // (and avoid a circular dependency between IR and Analysis). return "loop"; } +static std::string getDescription(const Region &R) { + // FIXME: Move into RegionInfo so we can get a better description + // (and avoid a circular dependency between IR and Analysis). + return "region"; +} + static std::string getDescription(const CallGraphSCC &SCC) { + // FIXME: Move into CallGraphSCCPass to avoid circular dependency between + // IR and Analysis. std::string Desc = "SCC ("; bool First = true; for (CallGraphNode *CGN : SCC) { @@ -83,6 +91,7 @@ template bool OptBisect::shouldRunPass(const Pass *, const Function &); template bool OptBisect::shouldRunPass(const Pass *, const BasicBlock &); template bool OptBisect::shouldRunPass(const Pass *, const Loop &); template bool OptBisect::shouldRunPass(const Pass *, const CallGraphSCC &); +template bool OptBisect::shouldRunPass(const Pass *, const Region &); template <class UnitT> bool OptBisect::shouldRunPass(const Pass *P, const UnitT &U) { diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 9efc095f9fcf..92145aaf667a 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -122,6 +122,7 @@ static void computeCacheKey( AddUnsigned(Conf.CGOptLevel); AddUnsigned(Conf.CGFileType); AddUnsigned(Conf.OptLevel); + AddUnsigned(Conf.UseNewPM); AddString(Conf.OptPipeline); AddString(Conf.AAPipeline); AddString(Conf.OverrideTriple); @@ -621,6 +622,19 @@ unsigned LTO::getMaxTasks() const { } Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { + // Compute "dead" symbols, we don't want to import/export these! + DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; + for (auto &Res : GlobalResolutions) { + if (Res.second.VisibleOutsideThinLTO && + // IRName will be defined if we have seen the prevailing copy of + // this value. If not, no need to preserve any ThinLTO copies. + !Res.second.IRName.empty()) + GUIDPreservedSymbols.insert(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); + } + + computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); + // Save the status of having a regularLTO combined module, as // this is needed for generating the ThinLTO Task ID, and // the CombinedModule will be moved at the end of runRegularLTO. @@ -930,6 +944,17 @@ ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix, }; } +static bool IsLiveByGUID(const ModuleSummaryIndex &Index, + GlobalValue::GUID GUID) { + auto VI = Index.getValueInfo(GUID); + if (!VI) + return false; + for (auto &I : VI.getSummaryList()) + if (Index.isGlobalValueLive(I.get())) + return true; + return false; +} + Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, bool HasRegularLTO) { if (ThinLTO.ModuleMap.empty()) @@ -962,22 +987,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; if (Conf.OptLevel > 0) { - // Compute "dead" symbols, we don't want to import/export these! - DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; - for (auto &Res : GlobalResolutions) { - if (Res.second.VisibleOutsideThinLTO && - // IRName will be defined if we have seen the prevailing copy of - // this value. If not, no need to preserve any ThinLTO copies. - !Res.second.IRName.empty()) - GUIDPreservedSymbols.insert(GlobalValue::getGUID( - GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); - } - - auto DeadSymbols = - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); - ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - ImportLists, ExportLists, &DeadSymbols); + ImportLists, ExportLists); std::set<GlobalValue::GUID> ExportedGUIDs; for (auto &Res : GlobalResolutions) { @@ -992,7 +1003,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, auto GUID = GlobalValue::getGUID( GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); // Mark exported unless index-based analysis determined it to be dead. - if (!DeadSymbols.count(GUID)) + if (IsLiveByGUID(ThinLTO.CombinedIndex, GUID)) ExportedGUIDs.insert(GUID); } diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index f9c41f5c9744..3f72e446cdf2 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -42,11 +42,6 @@ using namespace llvm; using namespace lto; -static cl::opt<bool> - LTOUseNewPM("lto-use-new-pm", - cl::desc("Run LTO passes using the new pass manager"), - cl::init(false), cl::Hidden); - LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { errs() << "failed to open " << Path << ": " << Msg << '\n'; errs().flush(); @@ -266,7 +261,7 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); - else if (LTOUseNewPM) + else if (Conf.UseNewPM) runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index ca3fc60f9501..6b221a347c17 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -628,13 +628,13 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); // Resolve LinkOnce/Weak symbols. StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; @@ -673,13 +673,13 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); @@ -750,13 +750,13 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); auto &ExportList = ExportLists[ModuleIdentifier]; // Be friendly and don't nuke totally the module when the client didn't @@ -902,14 +902,14 @@ void ThinLTOCodeGenerator::run() { computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols); + computeDeadSymbols(*Index, GUIDPreservedSymbols); // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 8c3df36cfb48..9b2031f05043 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -40,6 +40,7 @@ using namespace llvm; #define DEBUG_TYPE "reloc-info" namespace { + // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. @@ -50,6 +51,82 @@ struct SectionBookkeeping { uint64_t ContentsOffset; }; +// The signature of a wasm function, in a struct capable of being used as a +// DenseMap key. +struct WasmFunctionType { + // Support empty and tombstone instances, needed by DenseMap. + enum { Plain, Empty, Tombstone } State; + + // The return types of the function. + SmallVector<wasm::ValType, 1> Returns; + + // The parameter types of the function. + SmallVector<wasm::ValType, 4> Params; + + WasmFunctionType() : State(Plain) {} + + bool operator==(const WasmFunctionType &Other) const { + return State == Other.State && Returns == Other.Returns && + Params == Other.Params; + } +}; + +// Traits for using WasmFunctionType in a DenseMap. +struct WasmFunctionTypeDenseMapInfo { + static WasmFunctionType getEmptyKey() { + WasmFunctionType FuncTy; + FuncTy.State = WasmFunctionType::Empty; + return FuncTy; + } + static WasmFunctionType getTombstoneKey() { + WasmFunctionType FuncTy; + FuncTy.State = WasmFunctionType::Tombstone; + return FuncTy; + } + static unsigned getHashValue(const WasmFunctionType &FuncTy) { + uintptr_t Value = FuncTy.State; + for (wasm::ValType Ret : FuncTy.Returns) + Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret)); + for (wasm::ValType Param : FuncTy.Params) + Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param)); + return Value; + } + static bool isEqual(const WasmFunctionType &LHS, + const WasmFunctionType &RHS) { + return LHS == RHS; + } +}; + +// A wasm import to be written into the import section. +struct WasmImport { + StringRef ModuleName; + StringRef FieldName; + unsigned Kind; + int32_t Type; +}; + +// A wasm function to be written into the function section. +struct WasmFunction { + int32_t Type; + const MCSymbolWasm *Sym; +}; + +// A wasm export to be written into the export section. +struct WasmExport { + StringRef FieldName; + unsigned Kind; + uint32_t Index; +}; + +// A wasm global to be written into the global section. +struct WasmGlobal { + wasm::ValType Type; + bool IsMutable; + bool HasImport; + uint64_t InitialValue; + uint32_t ImportIndex; +}; + class WasmObjectWriter : public MCObjectWriter { /// Helper struct for containing some precomputed information on symbols. struct WasmSymbolData { @@ -91,18 +168,10 @@ public: : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {} private: - void reset() override { - MCObjectWriter::reset(); - } - ~WasmObjectWriter() override; void writeHeader(const MCAssembler &Asm); - void writeValueType(wasm::ValType Ty) { - encodeSLEB128(int32_t(Ty), getStream()); - } - void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, @@ -112,7 +181,37 @@ private: const MCAsmLayout &Layout) override; void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + + void writeValueType(wasm::ValType Ty) { + encodeSLEB128(int32_t(Ty), getStream()); + } + + void writeTypeSection(const SmallVector<WasmFunctionType, 4> &FunctionTypes); + void writeImportSection(const SmallVector<WasmImport, 4> &Imports); + void writeFunctionSection(const SmallVector<WasmFunction, 4> &Functions); + void writeTableSection(const SmallVector<uint32_t, 4> &TableElems); + void writeMemorySection(const SmallVector<char, 0> &DataBytes); + void writeGlobalSection(const SmallVector<WasmGlobal, 4> &Globals); + void writeExportSection(const SmallVector<WasmExport, 4> &Exports); + void writeElemSection(const SmallVector<uint32_t, 4> &TableElems); + void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices, + const SmallVector<WasmFunction, 4> &Functions); + uint64_t + writeDataSection(const SmallVector<char, 0> &DataBytes, + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices); + void writeNameSection(const SmallVector<WasmFunction, 4> &Functions, + const SmallVector<WasmImport, 4> &Imports, + uint32_t NumFuncImports); + void writeCodeRelocSection( + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices); + void writeDataRelocSection( + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices, + uint64_t DataSectionHeaderSize); + void writeLinkingMetaDataSection(bool HasStackPointer, + uint32_t StackPointerGlobal); }; + } // end anonymous namespace WasmObjectWriter::~WasmObjectWriter() {} @@ -278,86 +377,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, DataRelocations.push_back(Rec); } -namespace { - -// The signature of a wasm function, in a struct capable of being used as a -// DenseMap key. -struct WasmFunctionType { - // Support empty and tombstone instances, needed by DenseMap. - enum { Plain, Empty, Tombstone } State; - - // The return types of the function. - SmallVector<wasm::ValType, 1> Returns; - - // The parameter types of the function. - SmallVector<wasm::ValType, 4> Params; - - WasmFunctionType() : State(Plain) {} - - bool operator==(const WasmFunctionType &Other) const { - return State == Other.State && Returns == Other.Returns && - Params == Other.Params; - } -}; - -// Traits for using WasmFunctionType in a DenseMap. -struct WasmFunctionTypeDenseMapInfo { - static WasmFunctionType getEmptyKey() { - WasmFunctionType FuncTy; - FuncTy.State = WasmFunctionType::Empty; - return FuncTy; - } - static WasmFunctionType getTombstoneKey() { - WasmFunctionType FuncTy; - FuncTy.State = WasmFunctionType::Tombstone; - return FuncTy; - } - static unsigned getHashValue(const WasmFunctionType &FuncTy) { - uintptr_t Value = FuncTy.State; - for (wasm::ValType Ret : FuncTy.Returns) - Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret)); - for (wasm::ValType Param : FuncTy.Params) - Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param)); - return Value; - } - static bool isEqual(const WasmFunctionType &LHS, - const WasmFunctionType &RHS) { - return LHS == RHS; - } -}; - -// A wasm import to be written into the import section. -struct WasmImport { - StringRef ModuleName; - StringRef FieldName; - unsigned Kind; - int32_t Type; -}; - -// A wasm function to be written into the function section. -struct WasmFunction { - int32_t Type; - const MCSymbolWasm *Sym; -}; - -// A wasm export to be written into the export section. -struct WasmExport { - StringRef FieldName; - unsigned Kind; - uint32_t Index; -}; - -// A wasm global to be written into the global section. -struct WasmGlobal { - wasm::ValType Type; - bool IsMutable; - bool HasImport; - uint64_t InitialValue; - uint32_t ImportIndex; -}; - -} // end anonymous namespace - // Write X as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. static void @@ -529,6 +548,367 @@ static void WriteTypeRelocations( } } +void WasmObjectWriter::writeTypeSection( + const SmallVector<WasmFunctionType, 4> &FunctionTypes) { + if (FunctionTypes.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_TYPE); + + encodeULEB128(FunctionTypes.size(), getStream()); + + for (const WasmFunctionType &FuncTy : FunctionTypes) { + encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream()); + encodeULEB128(FuncTy.Params.size(), getStream()); + for (wasm::ValType Ty : FuncTy.Params) + writeValueType(Ty); + encodeULEB128(FuncTy.Returns.size(), getStream()); + for (wasm::ValType Ty : FuncTy.Returns) + writeValueType(Ty); + } + + endSection(Section); +} + +void WasmObjectWriter::writeImportSection( + const SmallVector<WasmImport, 4> &Imports) { + if (Imports.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_IMPORT); + + encodeULEB128(Imports.size(), getStream()); + for (const WasmImport &Import : Imports) { + StringRef ModuleName = Import.ModuleName; + encodeULEB128(ModuleName.size(), getStream()); + writeBytes(ModuleName); + + StringRef FieldName = Import.FieldName; + encodeULEB128(FieldName.size(), getStream()); + writeBytes(FieldName); + + encodeULEB128(Import.Kind, getStream()); + + switch (Import.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + encodeULEB128(Import.Type, getStream()); + break; + case wasm::WASM_EXTERNAL_GLOBAL: + encodeSLEB128(int32_t(Import.Type), getStream()); + encodeULEB128(0, getStream()); // mutability + break; + default: + llvm_unreachable("unsupported import kind"); + } + } + + endSection(Section); +} + +void WasmObjectWriter::writeFunctionSection( + const SmallVector<WasmFunction, 4> &Functions) { + if (Functions.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_FUNCTION); + + encodeULEB128(Functions.size(), getStream()); + for (const WasmFunction &Func : Functions) + encodeULEB128(Func.Type, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeTableSection( + const SmallVector<uint32_t, 4> &TableElems) { + // For now, always emit the table section, since indirect calls are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no indirect calls. + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_TABLE); + + // The number of tables, fixed to 1 for now. + encodeULEB128(1, getStream()); + + encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); + + encodeULEB128(0, getStream()); // flags + encodeULEB128(TableElems.size(), getStream()); // initial + + endSection(Section); +} + +void WasmObjectWriter::writeMemorySection( + const SmallVector<char, 0> &DataBytes) { + // For now, always emit the memory section, since loads and stores are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no loads or stores. + SectionBookkeeping Section; + uint32_t NumPages = + (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize; + + startSection(Section, wasm::WASM_SEC_MEMORY); + encodeULEB128(1, getStream()); // number of memory spaces + + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumPages, getStream()); // initial + + endSection(Section); +} + +void WasmObjectWriter::writeGlobalSection( + const SmallVector<WasmGlobal, 4> &Globals) { + if (Globals.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_GLOBAL); + + encodeULEB128(Globals.size(), getStream()); + for (const WasmGlobal &Global : Globals) { + writeValueType(Global.Type); + write8(Global.IsMutable); + + if (Global.HasImport) { + assert(Global.InitialValue == 0); + write8(wasm::WASM_OPCODE_GET_GLOBAL); + encodeULEB128(Global.ImportIndex, getStream()); + } else { + assert(Global.ImportIndex == 0); + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(Global.InitialValue, getStream()); // offset + } + write8(wasm::WASM_OPCODE_END); + } + + endSection(Section); +} + +void WasmObjectWriter::writeExportSection( + const SmallVector<WasmExport, 4> &Exports) { + if (Exports.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_EXPORT); + + encodeULEB128(Exports.size(), getStream()); + for (const WasmExport &Export : Exports) { + encodeULEB128(Export.FieldName.size(), getStream()); + writeBytes(Export.FieldName); + + encodeSLEB128(Export.Kind, getStream()); + + encodeULEB128(Export.Index, getStream()); + } + + endSection(Section); +} + +void WasmObjectWriter::writeElemSection( + const SmallVector<uint32_t, 4> &TableElems) { + if (TableElems.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_ELEM); + + encodeULEB128(1, getStream()); // number of "segments" + encodeULEB128(0, getStream()); // the table index + + // init expr for starting offset + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(0, getStream()); + write8(wasm::WASM_OPCODE_END); + + encodeULEB128(TableElems.size(), getStream()); + for (uint32_t Elem : TableElems) + encodeULEB128(Elem, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeCodeSection( + const MCAssembler &Asm, const MCAsmLayout &Layout, + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices, + const SmallVector<WasmFunction, 4> &Functions) { + if (Functions.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CODE); + + encodeULEB128(Functions.size(), getStream()); + + for (const WasmFunction &Func : Functions) { + MCSectionWasm &FuncSection = + static_cast<MCSectionWasm &>(Func.Sym->getSection()); + + if (Func.Sym->isVariable()) + report_fatal_error("weak symbols not supported yet"); + + if (Func.Sym->getOffset() != 0) + report_fatal_error("function sections must contain one function each"); + + if (!Func.Sym->getSize()) + report_fatal_error("function symbols must have a size set with .size"); + + int64_t Size = 0; + if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) + report_fatal_error(".size expression must be evaluatable"); + + encodeULEB128(Size, getStream()); + + FuncSection.setSectionOffset(getStream().tell() - + Section.ContentsOffset); + + Asm.writeSectionData(&FuncSection, Layout); + } + + // Apply the type index fixups for call_indirect etc. instructions. + for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { + uint32_t Type = TypeIndexFixupTypes[i]; + unsigned Padding = PaddingFor5ByteULEB128(Type); + + const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; + assert(Fixup.Addend == 0); + assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); + uint64_t Offset = Fixup.Offset + + Fixup.FixupSection->getSectionOffset(); + + uint8_t Buffer[16]; + unsigned SizeLen = encodeULEB128(Type, Buffer, Padding); + assert(SizeLen == 5); + getStream().pwrite((char *)Buffer, SizeLen, + Section.ContentsOffset + Offset); + } + + // Apply fixups. + ApplyRelocations(CodeRelocations, getStream(), SymbolIndices, + Section.ContentsOffset); + + endSection(Section); +} + +uint64_t WasmObjectWriter::writeDataSection( + const SmallVector<char, 0> &DataBytes, + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) { + if (DataBytes.empty()) + return 0; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_DATA); + + encodeULEB128(1, getStream()); // count + encodeULEB128(0, getStream()); // memory index + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(0, getStream()); // offset + write8(wasm::WASM_OPCODE_END); + encodeULEB128(DataBytes.size(), getStream()); // size + uint32_t HeaderSize = getStream().tell() - Section.ContentsOffset; + writeBytes(DataBytes); // data + + // Apply fixups. + ApplyRelocations(DataRelocations, getStream(), SymbolIndices, + Section.ContentsOffset + HeaderSize); + + endSection(Section); + return HeaderSize; +} + +void WasmObjectWriter::writeNameSection( + const SmallVector<WasmFunction, 4> &Functions, + const SmallVector<WasmImport, 4> &Imports, + unsigned NumFuncImports) { + uint32_t TotalFunctions = NumFuncImports + Functions.size(); + if (TotalFunctions == 0) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "name"); + SectionBookkeeping SubSection; + startSection(SubSection, wasm::WASM_NAMES_FUNCTION); + + encodeULEB128(TotalFunctions, getStream()); + uint32_t Index = 0; + for (const WasmImport &Import : Imports) { + if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { + encodeULEB128(Index, getStream()); + encodeULEB128(Import.FieldName.size(), getStream()); + writeBytes(Import.FieldName); + ++Index; + } + } + for (const WasmFunction &Func : Functions) { + encodeULEB128(Index, getStream()); + encodeULEB128(Func.Sym->getName().size(), getStream()); + writeBytes(Func.Sym->getName()); + ++Index; + } + + endSection(SubSection); + endSection(Section); +} + +void WasmObjectWriter::writeCodeRelocSection( + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) { + // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // for descriptions of the reloc sections. + + if (CodeRelocations.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); + + encodeULEB128(wasm::WASM_SEC_CODE, getStream()); + encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); + + WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0); + WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeDataRelocSection( + DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices, + uint64_t DataSectionHeaderSize) { + // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // for descriptions of the reloc sections. + + if (DataRelocations.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA"); + + encodeULEB128(wasm::WASM_SEC_DATA, getStream()); + encodeULEB128(DataRelocations.size(), getStream()); + + WriteRelocations(DataRelocations, getStream(), SymbolIndices, + DataSectionHeaderSize); + + endSection(Section); +} + +void WasmObjectWriter::writeLinkingMetaDataSection( + bool HasStackPointer, uint32_t StackPointerGlobal) { + if (!HasStackPointer) + return; + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); + + encodeULEB128(1, getStream()); // count + + encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type + encodeULEB128(StackPointerGlobal, getStream()); // id + + endSection(Section); +} + void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { MCContext &Ctx = Asm.getContext(); @@ -730,16 +1110,21 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (IsAddressTaken.count(&WS)) TableElems.push_back(Index); } else { - if (WS.getOffset() != 0) - report_fatal_error("data sections must contain one variable each"); - if (!WS.getSize()) - report_fatal_error("data symbols must have a size set with .size"); - - int64_t Size = 0; - if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) - report_fatal_error(".size expression must be evaluatable"); + if (WS.isTemporary() && !WS.getSize()) + continue; if (WS.isDefined(false)) { + if (WS.getOffset() != 0) + report_fatal_error("data sections must contain one variable each: " + + WS.getName()); + if (!WS.getSize()) + report_fatal_error("data symbols must have a size set with .size: " + + WS.getName()); + + int64_t Size = 0; + if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) + report_fatal_error(".size expression must be evaluatable"); + MCSectionWasm &DataSection = static_cast<MCSectionWasm &>(WS.getSection()); @@ -827,322 +1212,23 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // Write out the Wasm header. writeHeader(Asm); - SectionBookkeeping Section; - - // === Type Section ========================================================= - if (!FunctionTypes.empty()) { - startSection(Section, wasm::WASM_SEC_TYPE); - - encodeULEB128(FunctionTypes.size(), getStream()); - - for (WasmFunctionType &FuncTy : FunctionTypes) { - encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream()); - encodeULEB128(FuncTy.Params.size(), getStream()); - for (wasm::ValType Ty : FuncTy.Params) - writeValueType(Ty); - encodeULEB128(FuncTy.Returns.size(), getStream()); - for (wasm::ValType Ty : FuncTy.Returns) - writeValueType(Ty); - } - - endSection(Section); - } - - // === Import Section ======================================================== - if (!Imports.empty()) { - startSection(Section, wasm::WASM_SEC_IMPORT); - - encodeULEB128(Imports.size(), getStream()); - for (const WasmImport &Import : Imports) { - StringRef ModuleName = Import.ModuleName; - encodeULEB128(ModuleName.size(), getStream()); - writeBytes(ModuleName); - - StringRef FieldName = Import.FieldName; - encodeULEB128(FieldName.size(), getStream()); - writeBytes(FieldName); - - encodeULEB128(Import.Kind, getStream()); - - switch (Import.Kind) { - case wasm::WASM_EXTERNAL_FUNCTION: - encodeULEB128(Import.Type, getStream()); - break; - case wasm::WASM_EXTERNAL_GLOBAL: - encodeSLEB128(int32_t(Import.Type), getStream()); - encodeULEB128(0, getStream()); // mutability - break; - default: - llvm_unreachable("unsupported import kind"); - } - } - - endSection(Section); - } - - // === Function Section ====================================================== - if (!Functions.empty()) { - startSection(Section, wasm::WASM_SEC_FUNCTION); - - encodeULEB128(Functions.size(), getStream()); - for (const WasmFunction &Func : Functions) - encodeULEB128(Func.Type, getStream()); - - endSection(Section); - } - - // === Table Section ========================================================= - // For now, always emit the table section, since indirect calls are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no indirect calls. - startSection(Section, wasm::WASM_SEC_TABLE); - - // The number of tables, fixed to 1 for now. - encodeULEB128(1, getStream()); - - encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); - - encodeULEB128(0, getStream()); // flags - encodeULEB128(TableElems.size(), getStream()); // initial - - endSection(Section); - - // === Memory Section ======================================================== - // For now, always emit the memory section, since loads and stores are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no loads or stores. - uint32_t NumPages = - (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize; - - startSection(Section, wasm::WASM_SEC_MEMORY); - encodeULEB128(1, getStream()); // number of memory spaces - - encodeULEB128(0, getStream()); // flags - encodeULEB128(NumPages, getStream()); // initial - - endSection(Section); - - // === Global Section ======================================================== - if (!Globals.empty()) { - startSection(Section, wasm::WASM_SEC_GLOBAL); - - encodeULEB128(Globals.size(), getStream()); - for (const WasmGlobal &Global : Globals) { - writeValueType(Global.Type); - write8(Global.IsMutable); - - if (Global.HasImport) { - assert(Global.InitialValue == 0); - write8(wasm::WASM_OPCODE_GET_GLOBAL); - encodeULEB128(Global.ImportIndex, getStream()); - } else { - assert(Global.ImportIndex == 0); - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(Global.InitialValue, getStream()); // offset - } - write8(wasm::WASM_OPCODE_END); - } - - endSection(Section); - } - - // === Export Section ======================================================== - if (!Exports.empty()) { - startSection(Section, wasm::WASM_SEC_EXPORT); - - encodeULEB128(Exports.size(), getStream()); - for (const WasmExport &Export : Exports) { - encodeULEB128(Export.FieldName.size(), getStream()); - writeBytes(Export.FieldName); - - encodeSLEB128(Export.Kind, getStream()); - - encodeULEB128(Export.Index, getStream()); - } - - endSection(Section); - } - -#if 0 // TODO: Start Section - if (HaveStartFunction) { - // === Start Section ========================================================= - startSection(Section, wasm::WASM_SEC_START); - - encodeSLEB128(StartFunction, getStream()); - - endSection(Section); - } -#endif - - // === Elem Section ========================================================== - if (!TableElems.empty()) { - startSection(Section, wasm::WASM_SEC_ELEM); - - encodeULEB128(1, getStream()); // number of "segments" - encodeULEB128(0, getStream()); // the table index - - // init expr for starting offset - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); - write8(wasm::WASM_OPCODE_END); - - encodeULEB128(TableElems.size(), getStream()); - for (uint32_t Elem : TableElems) - encodeULEB128(Elem, getStream()); - - endSection(Section); - } - - // === Code Section ========================================================== - if (!Functions.empty()) { - startSection(Section, wasm::WASM_SEC_CODE); - - encodeULEB128(Functions.size(), getStream()); - - for (const WasmFunction &Func : Functions) { - MCSectionWasm &FuncSection = - static_cast<MCSectionWasm &>(Func.Sym->getSection()); - - if (Func.Sym->isVariable()) - report_fatal_error("weak symbols not supported yet"); - - if (Func.Sym->getOffset() != 0) - report_fatal_error("function sections must contain one function each"); - - if (!Func.Sym->getSize()) - report_fatal_error("function symbols must have a size set with .size"); - - int64_t Size = 0; - if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) - report_fatal_error(".size expression must be evaluatable"); - - encodeULEB128(Size, getStream()); - - FuncSection.setSectionOffset(getStream().tell() - - Section.ContentsOffset); - - Asm.writeSectionData(&FuncSection, Layout); - } - - // Apply the type index fixups for call_indirect etc. instructions. - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - uint32_t Type = TypeIndexFixupTypes[i]; - unsigned Padding = PaddingFor5ByteULEB128(Type); - - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - uint8_t Buffer[16]; - unsigned SizeLen = encodeULEB128(Type, Buffer, Padding); - assert(SizeLen == 5); - getStream().pwrite((char *)Buffer, SizeLen, - Section.ContentsOffset + Offset); - } - - // Apply fixups. - ApplyRelocations(CodeRelocations, getStream(), SymbolIndices, - Section.ContentsOffset); - - endSection(Section); - } - - // === Data Section ========================================================== - uint32_t DataSectionHeaderSize = 0; - if (!DataBytes.empty()) { - startSection(Section, wasm::WASM_SEC_DATA); - - encodeULEB128(1, getStream()); // count - encodeULEB128(0, getStream()); // memory index - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); // offset - write8(wasm::WASM_OPCODE_END); - encodeULEB128(DataBytes.size(), getStream()); // size - DataSectionHeaderSize = getStream().tell() - Section.ContentsOffset; - writeBytes(DataBytes); // data - - // Apply fixups. - ApplyRelocations(DataRelocations, getStream(), SymbolIndices, - Section.ContentsOffset + DataSectionHeaderSize); - - endSection(Section); - } - - // === Name Section ========================================================== - uint32_t TotalFunctions = NumFuncImports + Functions.size(); - if (TotalFunctions != 0) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "name"); - SectionBookkeeping SubSection; - startSection(SubSection, wasm::WASM_NAMES_FUNCTION); - - encodeULEB128(TotalFunctions, getStream()); - uint32_t Index = 0; - for (const WasmImport &Import : Imports) { - if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { - encodeULEB128(Index, getStream()); - encodeULEB128(Import.FieldName.size(), getStream()); - writeBytes(Import.FieldName); - ++Index; - } - } - for (const WasmFunction &Func : Functions) { - encodeULEB128(Index, getStream()); - encodeULEB128(Func.Sym->getName().size(), getStream()); - writeBytes(Func.Sym->getName()); - ++Index; - } - - endSection(SubSection); - endSection(Section); - } - - // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md - // for descriptions of the reloc sections. - - // === Code Reloc Section ==================================================== - if (!CodeRelocations.empty()) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); - - encodeULEB128(wasm::WASM_SEC_CODE, getStream()); - - encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); - - WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0); - WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); - - endSection(Section); - } - - // === Data Reloc Section ==================================================== - if (!DataRelocations.empty()) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA"); - - encodeULEB128(wasm::WASM_SEC_DATA, getStream()); - - encodeULEB128(DataRelocations.size(), getStream()); - - WriteRelocations(DataRelocations, getStream(), SymbolIndices, - DataSectionHeaderSize); - - endSection(Section); - } - - // === Linking Metadata Section ============================================== - if (HasStackPointer) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); - - encodeULEB128(1, getStream()); // count - - encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type - encodeULEB128(StackPointerGlobal, getStream()); // id - - endSection(Section); - } + writeTypeSection(FunctionTypes); + writeImportSection(Imports); + writeFunctionSection(Functions); + writeTableSection(TableElems); + writeMemorySection(DataBytes); + writeGlobalSection(Globals); + writeExportSection(Exports); + // TODO: Start Section + writeElemSection(TableElems); + writeCodeSection(Asm, Layout, SymbolIndices, Functions); + uint64_t DataSectionHeaderSize = writeDataSection(DataBytes, SymbolIndices); + writeNameSection(Functions, Imports, NumFuncImports); + writeCodeRelocSection(SymbolIndices); + writeDataRelocSection(SymbolIndices, DataSectionHeaderSize); + writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal); // TODO: Translate the .comment section to the output. - // TODO: Translate debug sections to the output. } diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp index f652ff57f30d..21d29835624e 100644 --- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp +++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -17,6 +17,11 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -36,16 +41,80 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false) +LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind) LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind) LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineEntry) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceColumnEntry) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceFileChecksumEntry) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineInfo) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineBlock) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::InlineeInfo) -LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::InlineeSite) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineBlock) +LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite) + +namespace llvm { +namespace CodeViewYAML { +namespace detail { +struct YAMLSubsectionBase { + explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {} + DebugSubsectionKind Kind; + virtual ~YAMLSubsectionBase() {} + + virtual void map(IO &IO) = 0; + virtual std::unique_ptr<DebugSubsection> + toCodeViewSubsection(DebugStringTableSubsection *UseStrings, + DebugChecksumsSubsection *UseChecksums) const = 0; +}; +} +} +} + +namespace { +struct YAMLChecksumsSubsection : public YAMLSubsectionBase { + YAMLChecksumsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {} + + void map(IO &IO) override; + std::unique_ptr<DebugSubsection> + toCodeViewSubsection(DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected<std::shared_ptr<YAMLChecksumsSubsection>> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &FC); + + std::vector<SourceFileChecksumEntry> Checksums; +}; + +struct YAMLLinesSubsection : public YAMLSubsectionBase { + YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {} + + void map(IO &IO) override; + std::unique_ptr<DebugSubsection> + toCodeViewSubsection(DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected<std::shared_ptr<YAMLLinesSubsection>> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugLinesSubsectionRef &Lines); + + SourceLineInfo Lines; +}; + +struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase { + YAMLInlineeLinesSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {} + + void map(IO &IO) override; + std::unique_ptr<DebugSubsection> + toCodeViewSubsection(DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected<std::shared_ptr<YAMLInlineeLinesSubsection>> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugInlineeLinesSubsectionRef &Lines); + + InlineeInfo InlineeLines; +}; +} void ScalarBitSetTraits<LineFlags>::bitset(IO &io, LineFlags &Flags) { io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns); @@ -99,21 +168,6 @@ void MappingTraits<SourceFileChecksumEntry>::mapping( IO.mapRequired("Checksum", Obj.ChecksumBytes); } -void MappingTraits<SourceLineInfo>::mapping(IO &IO, SourceLineInfo &Obj) { - IO.mapRequired("CodeSize", Obj.CodeSize); - - IO.mapRequired("Flags", Obj.Flags); - IO.mapRequired("RelocOffset", Obj.RelocOffset); - IO.mapRequired("RelocSegment", Obj.RelocSegment); - IO.mapRequired("Blocks", Obj.Blocks); -} - -void MappingTraits<SourceFileInfo>::mapping(IO &IO, SourceFileInfo &Obj) { - IO.mapOptional("Checksums", Obj.FileChecksums); - IO.mapOptional("Lines", Obj.LineFragments); - IO.mapOptional("InlineeLines", Obj.Inlinees); -} - void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) { IO.mapRequired("FileName", Obj.FileName); IO.mapRequired("LineNum", Obj.SourceLineNum); @@ -121,7 +175,310 @@ void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) { IO.mapOptional("ExtraFiles", Obj.ExtraFiles); } -void MappingTraits<InlineeInfo>::mapping(IO &IO, InlineeInfo &Obj) { - IO.mapRequired("HasExtraFiles", Obj.HasExtraFiles); - IO.mapRequired("Sites", Obj.Sites); +void YAMLChecksumsSubsection::map(IO &IO) { + IO.mapTag("!FileChecksums", true); + IO.mapRequired("Checksums", Checksums); +} + +void YAMLLinesSubsection::map(IO &IO) { + IO.mapTag("!Lines", true); + IO.mapRequired("CodeSize", Lines.CodeSize); + + IO.mapRequired("Flags", Lines.Flags); + IO.mapRequired("RelocOffset", Lines.RelocOffset); + IO.mapRequired("RelocSegment", Lines.RelocSegment); + IO.mapRequired("Blocks", Lines.Blocks); +} + +void YAMLInlineeLinesSubsection::map(IO &IO) { + IO.mapTag("!InlineeLines", true); + IO.mapRequired("HasExtraFiles", InlineeLines.HasExtraFiles); + IO.mapRequired("Sites", InlineeLines.Sites); +} + +void MappingTraits<YAMLDebugSubsection>::mapping( + IO &IO, YAMLDebugSubsection &Subsection) { + if (!IO.outputting()) { + if (IO.mapTag("!FileChecksums")) { + auto SS = std::make_shared<YAMLChecksumsSubsection>(); + Subsection.Subsection = SS; + } else if (IO.mapTag("!Lines")) { + Subsection.Subsection = std::make_shared<YAMLLinesSubsection>(); + } else if (IO.mapTag("!InlineeLines")) { + Subsection.Subsection = std::make_shared<YAMLInlineeLinesSubsection>(); + } else { + llvm_unreachable("Unexpected subsection tag!"); + } + } + Subsection.Subsection->map(IO); +} + +static Expected<const YAMLChecksumsSubsection &> +findChecksums(ArrayRef<YAMLDebugSubsection> Subsections) { + for (const auto &SS : Subsections) { + if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) { + return static_cast<const YAMLChecksumsSubsection &>(*SS.Subsection); + } + } + return make_error<CodeViewError>(cv_error_code::no_records); +} + +std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection( + DebugStringTableSubsection *UseStrings, + DebugChecksumsSubsection *UseChecksums) const { + assert(UseStrings && !UseChecksums); + auto Result = llvm::make_unique<DebugChecksumsSubsection>(*UseStrings); + for (const auto &CS : Checksums) { + Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes); + } + return std::move(Result); +} + +std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection( + DebugStringTableSubsection *UseStrings, + DebugChecksumsSubsection *UseChecksums) const { + assert(UseStrings && UseChecksums); + auto Result = + llvm::make_unique<DebugLinesSubsection>(*UseChecksums, *UseStrings); + Result->setCodeSize(Lines.CodeSize); + Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset); + Result->setFlags(Lines.Flags); + for (const auto &LC : Lines.Blocks) { + Result->createBlock(LC.FileName); + if (Result->hasColumnInfo()) { + for (const auto &Item : zip(LC.Lines, LC.Columns)) { + auto &L = std::get<0>(Item); + auto &C = std::get<1>(Item); + uint32_t LE = L.LineStart + L.EndDelta; + Result->addLineAndColumnInfo(L.Offset, + LineInfo(L.LineStart, LE, L.IsStatement), + C.StartColumn, C.EndColumn); + } + } else { + for (const auto &L : LC.Lines) { + uint32_t LE = L.LineStart + L.EndDelta; + Result->addLineInfo(L.Offset, LineInfo(L.LineStart, LE, L.IsStatement)); + } + } + } + return llvm::cast<DebugSubsection>(std::move(Result)); +} + +std::unique_ptr<DebugSubsection> +YAMLInlineeLinesSubsection::toCodeViewSubsection( + DebugStringTableSubsection *UseStrings, + DebugChecksumsSubsection *UseChecksums) const { + assert(UseChecksums); + auto Result = llvm::make_unique<DebugInlineeLinesSubsection>( + *UseChecksums, InlineeLines.HasExtraFiles); + + for (const auto &Site : InlineeLines.Sites) { + Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName, + Site.SourceLineNum); + if (!InlineeLines.HasExtraFiles) + continue; + + for (auto EF : Site.ExtraFiles) { + Result->addExtraFile(EF); + } + } + return llvm::cast<DebugSubsection>(std::move(Result)); +} + +static Expected<SourceFileChecksumEntry> +convertOneChecksum(const DebugStringTableSubsectionRef &Strings, + const FileChecksumEntry &CS) { + auto ExpectedString = Strings.getString(CS.FileNameOffset); + if (!ExpectedString) + return ExpectedString.takeError(); + + SourceFileChecksumEntry Result; + Result.ChecksumBytes.Bytes = CS.Checksum; + Result.Kind = CS.Kind; + Result.FileName = *ExpectedString; + return Result; +} + +static Expected<StringRef> +getFileName(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) { + auto Iter = Checksums.getArray().at(FileID); + if (Iter == Checksums.getArray().end()) + return make_error<CodeViewError>(cv_error_code::no_records); + uint32_t Offset = Iter->FileNameOffset; + return Strings.getString(Offset); +} + +Expected<std::shared_ptr<YAMLChecksumsSubsection>> +YAMLChecksumsSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &FC) { + auto Result = std::make_shared<YAMLChecksumsSubsection>(); + + for (const auto &CS : FC) { + auto ConvertedCS = convertOneChecksum(Strings, CS); + if (!ConvertedCS) + return ConvertedCS.takeError(); + Result->Checksums.push_back(*ConvertedCS); + } + return Result; +} + +Expected<std::shared_ptr<YAMLLinesSubsection>> +YAMLLinesSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugLinesSubsectionRef &Lines) { + auto Result = std::make_shared<YAMLLinesSubsection>(); + Result->Lines.CodeSize = Lines.header()->CodeSize; + Result->Lines.RelocOffset = Lines.header()->RelocOffset; + Result->Lines.RelocSegment = Lines.header()->RelocSegment; + Result->Lines.Flags = static_cast<LineFlags>(uint16_t(Lines.header()->Flags)); + for (const auto &L : Lines) { + SourceLineBlock Block; + auto EF = getFileName(Strings, Checksums, L.NameIndex); + if (!EF) + return EF.takeError(); + Block.FileName = *EF; + if (Lines.hasColumnInfo()) { + for (const auto &C : L.Columns) { + SourceColumnEntry SCE; + SCE.EndColumn = C.EndColumn; + SCE.StartColumn = C.StartColumn; + Block.Columns.push_back(SCE); + } + } + for (const auto &LN : L.LineNumbers) { + SourceLineEntry SLE; + LineInfo LI(LN.Flags); + SLE.Offset = LN.Offset; + SLE.LineStart = LI.getStartLine(); + SLE.EndDelta = LI.getLineDelta(); + SLE.IsStatement = LI.isStatement(); + Block.Lines.push_back(SLE); + } + Result->Lines.Blocks.push_back(Block); + } + return Result; +} + +Expected<std::shared_ptr<YAMLInlineeLinesSubsection>> +YAMLInlineeLinesSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugInlineeLinesSubsectionRef &Lines) { + auto Result = std::make_shared<YAMLInlineeLinesSubsection>(); + + Result->InlineeLines.HasExtraFiles = Lines.hasExtraFiles(); + for (const auto &IL : Lines) { + InlineeSite Site; + auto ExpF = getFileName(Strings, Checksums, IL.Header->FileID); + if (!ExpF) + return ExpF.takeError(); + Site.FileName = *ExpF; + Site.Inlinee = IL.Header->Inlinee.getIndex(); + Site.SourceLineNum = IL.Header->SourceLineNum; + if (Lines.hasExtraFiles()) { + for (const auto EF : IL.ExtraFiles) { + auto ExpF2 = getFileName(Strings, Checksums, EF); + if (!ExpF2) + return ExpF2.takeError(); + Site.ExtraFiles.push_back(*ExpF2); + } + } + Result->InlineeLines.Sites.push_back(Site); + } + return Result; +} + +Expected<std::vector<std::unique_ptr<DebugSubsection>>> +llvm::CodeViewYAML::convertSubsectionList( + ArrayRef<YAMLDebugSubsection> Subsections, + DebugStringTableSubsection &Strings) { + std::vector<std::unique_ptr<DebugSubsection>> Result; + if (Subsections.empty()) + return std::move(Result); + + auto Checksums = findChecksums(Subsections); + if (!Checksums) + return Checksums.takeError(); + auto ChecksumsBase = Checksums->toCodeViewSubsection(&Strings, nullptr); + DebugChecksumsSubsection &CS = + llvm::cast<DebugChecksumsSubsection>(*ChecksumsBase); + for (const auto &SS : Subsections) { + // We've already converted the checksums subsection, don't do it + // twice. + std::unique_ptr<DebugSubsection> CVS; + if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) + CVS = std::move(ChecksumsBase); + else + CVS = SS.Subsection->toCodeViewSubsection(&Strings, &CS); + Result.push_back(std::move(CVS)); + } + return std::move(Result); +} + +namespace { +struct SubsectionConversionVisitor : public DebugSubsectionVisitor { + explicit SubsectionConversionVisitor( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) + : Strings(Strings), Checksums(Checksums) {} + + Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override; + Error visitLines(DebugLinesSubsectionRef &Lines) override; + Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) override; + Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) override; + + YAMLDebugSubsection Subsection; + +private: + const DebugStringTableSubsectionRef &Strings; + const DebugChecksumsSubsectionRef &Checksums; +}; + +Error SubsectionConversionVisitor::visitUnknown( + DebugUnknownSubsectionRef &Unknown) { + return make_error<CodeViewError>(cv_error_code::operation_unsupported); +} + +Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) { + auto Result = + YAMLLinesSubsection::fromCodeViewSubsection(Strings, Checksums, Lines); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitFileChecksums( + DebugChecksumsSubsectionRef &Checksums) { + auto Result = + YAMLChecksumsSubsection::fromCodeViewSubsection(Strings, Checksums); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitInlineeLines( + DebugInlineeLinesSubsectionRef &Inlinees) { + auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection( + Strings, Checksums, Inlinees); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} +} + +Expected<YAMLDebugSubsection> YAMLDebugSubsection::fromCodeViewSubection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugSubsectionRecord &SS) { + SubsectionConversionVisitor V(Strings, Checksums); + if (auto EC = visitDebugSubsection(SS, V)) + return std::move(EC); + + return V.Subsection; } diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index 6e8bb5c7372c..bd97af3a9323 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -148,7 +148,8 @@ struct SymbolRecordBase { virtual ~SymbolRecordBase() {} virtual void map(yaml::IO &io) = 0; virtual codeview::CVSymbol - toCodeViewSymbol(BumpPtrAllocator &Allocator) const = 0; + toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const = 0; virtual Error fromCodeViewSymbol(codeview::CVSymbol Type) = 0; }; @@ -159,8 +160,9 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase { void map(yaml::IO &io) override; codeview::CVSymbol - toCodeViewSymbol(BumpPtrAllocator &Allocator) const override { - return SymbolSerializer::writeOneSymbol(Symbol, Allocator); + toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const override { + return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container); } Error fromCodeViewSymbol(codeview::CVSymbol CVS) override { return SymbolDeserializer::deserializeAs<T>(CVS, Symbol); @@ -429,8 +431,8 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) { } CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol( - BumpPtrAllocator &Allocator) const { - return Symbol->toCodeViewSymbol(Allocator); + BumpPtrAllocator &Allocator, CodeViewContainer Container) const { + return Symbol->toCodeViewSymbol(Allocator, Container); } namespace llvm { diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index eb81e58b9b0e..17c60348633c 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -310,6 +310,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Catch trivial redundancies FPM.addPass(EarlyCSEPass()); + // Hoisting of scalars and load expressions. + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + // Speculative execution if the target has divergent branches; otherwise nop. FPM.addPass(SpeculativeExecutionPass()); @@ -473,8 +477,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - if (EnableGVNHoist) - EarlyFPM.addPass(GVNHoistPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); // Interprocedural constant propagation now that basic cleanup has occured diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 318e21da999d..f7b7ad89e959 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -649,12 +649,10 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::tce: case Triple::tcele: case Triple::thumbeb: - case Triple::xcore: - return Triple::ELF; - case Triple::wasm32: case Triple::wasm64: - return Triple::Wasm; + case Triple::xcore: + return Triple::ELF; case Triple::ppc: case Triple::ppc64: diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h index 4f656f94ea12..b99c1d1d6b3e 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h @@ -1,4 +1,4 @@ -//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===// +//==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,8 @@ namespace llvm { +class TargetRegisterInfo; + /// Add the accumulator chaining constraint to a PBQP graph class A57ChainingConstraint : public PBQPRAConstraint { public: @@ -33,6 +35,7 @@ private: // Add constraints between existing chains void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra); }; -} + +} // end namespace llvm #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td index d098cf7a5a37..7402bcf1346c 100644 --- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -56,12 +56,14 @@ def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; } def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } +def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; } def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } @@ -76,6 +78,7 @@ def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } +def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; } def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } @@ -83,6 +86,10 @@ def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } //===----------------------------------------------------------------------===// // Define 2 micro-op types +def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 0; + let NumMicroOps = 2; +} def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 1; let NumMicroOps = 2; @@ -476,17 +483,19 @@ def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast // ----------------------------------------------------------------------------- def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>; -def FalkorFMOVZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || +def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || + MI->getOperand(1).getReg() == AArch64::XZR}]>; def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; def FalkorWr_FMOV : SchedWriteVariant<[ - SchedVar<FalkorFMOVZrReg, [FalkorWr_1none_0cyc]>, + SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>, SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>; def FalkorWr_MOVZ : SchedWriteVariant<[ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>, - SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>; + SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd + def FalkorWr_ADDSUBsx : SchedWriteVariant<[ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>, @@ -500,6 +509,10 @@ def FalkorWr_LDRSro : SchedWriteVariant<[ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>, SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>; +def FalkorWr_ORRi : SchedWriteVariant<[ + SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd + SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>; + def FalkorWr_PRFMro : SchedWriteVariant<[ SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>, SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>; @@ -810,7 +823,8 @@ def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; @@ -825,7 +839,7 @@ def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; @@ -849,7 +863,7 @@ def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; +def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; @@ -1036,13 +1050,13 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFM // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; -def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(S|D)i$")>; +def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd // FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr -def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs FMOVD0, FMOVS0)>; +def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; @@ -1107,11 +1121,12 @@ def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], // Move and Shift Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>; -def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>; -def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; -def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs MOVi32imm, MOVi64imm)>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation) def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 78ff3bbe3d1a..55d18c3f3646 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -55,6 +55,8 @@ FunctionPass *createAMDGPUMachineCFGStructurizerPass(); void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); extern char &AMDGPUMachineCFGStructurizerID; +void initializeAMDGPUAlwaysInlinePass(PassRegistry&); + ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index e7ebb37a9d62..b50e8d1d659e 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -365,6 +365,13 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "Force to generate flat instruction for global" >; +def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < + "auto-waitcnt-before-barrier", + "AutoWaitcntBeforeBarrier", + "true", + "Hardware automatically inserts waitcnt before barrier" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 1d03714874e2..8084d368c80f 100644 --- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -22,18 +22,22 @@ using namespace llvm; namespace { class AMDGPUAlwaysInline : public ModulePass { - static char ID; - bool GlobalOpt; public: - AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { } + static char ID; + + AMDGPUAlwaysInline(bool GlobalOpt = false) : + ModulePass(ID), GlobalOpt(GlobalOpt) { } bool runOnModule(Module &M) override; StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; } }; } // End anonymous namespace +INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", + "AMDGPU Inline All Functions", false, false) + char AMDGPUAlwaysInline::ID = 0; bool AMDGPUAlwaysInline::runOnModule(Module &M) { diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 57905be18813..267f4807a788 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -28,11 +28,16 @@ using namespace llvm; AMDGPULegalizerInfo::AMDGPULegalizerInfo() { using namespace TargetOpcode; + const LLT S1= LLT::scalar(1); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); const LLT P1 = LLT::pointer(1, 64); const LLT P2 = LLT::pointer(2, 64); + // FIXME: i1 operands to intrinsics should always be legal, but other i1 + // values may not be legal. We need to figure out how to distinguish + // between these two scenarios. + setAction({G_CONSTANT, S1}, Legal); setAction({G_CONSTANT, S32}, Legal); setAction({G_CONSTANT, S64}, Legal); diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6e301b4ad527..8d157e2f98f2 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -91,6 +91,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FPExceptions(false), DX10Clamp(false), FlatForGlobal(false), + AutoWaitcntBeforeBarrier(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 0582ce95693a..ed9cbb994fad 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -110,6 +110,7 @@ protected: bool FPExceptions; bool DX10Clamp; bool FlatForGlobal; + bool AutoWaitcntBeforeBarrier; bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool HasApertureRegs; @@ -195,7 +196,8 @@ public: } bool isOpenCLEnv() const { - return TargetTriple.getEnvironment() == Triple::OpenCL; + return TargetTriple.getEnvironment() == Triple::OpenCL || + TargetTriple.getEnvironmentName() == "amdgizcl"; } Generation getGeneration() const { @@ -363,6 +365,10 @@ public: return FlatForGlobal; } + bool hasAutoWaitcntBeforeBarrier() const { + return AutoWaitcntBeforeBarrier; + } + bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } @@ -727,12 +733,6 @@ public: /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; - /// \returns True if waitcnt instruction is needed before barrier instruction, - /// false otherwise. - bool needWaitcntBeforeBarrier() const { - return true; - } - /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 596f02ae4a64..404598ff4738 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -116,7 +116,7 @@ static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, static cl::opt<bool> EnableSIInsertWaitcntsPass( "enable-si-insert-waitcnts", cl::desc("Use new waitcnt insertion pass"), - cl::init(false)); + cl::init(true)); // Option to run late CFG structurizer static cl::opt<bool> LateCFGStructurize( @@ -139,6 +139,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSIShrinkInstructionsPass(*PR); initializeSIFixControlFlowLiveIntervalsPass(*PR); initializeSILoadStoreOptimizerPass(*PR); + initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPULowerIntrinsicsPass(*PR); diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f13629a3185f..dfac068d1f69 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -35,9 +35,12 @@ struct FoldCandidate { }; unsigned char UseOpNo; MachineOperand::MachineOperandType Kind; + bool Commuted; - FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : - UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) { + FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, + bool Commuted_ = false) : + UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()), + Commuted(Commuted_) { if (FoldOp->isImm()) { ImmToFold = FoldOp->getImm(); } else if (FoldOp->isFI()) { @@ -59,6 +62,10 @@ struct FoldCandidate { bool isReg() const { return Kind == MachineOperand::MO_Register; } + + bool isCommuted() const { + return Commuted; + } }; class SIFoldOperands : public MachineFunctionPass { @@ -237,8 +244,13 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) return false; - if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) + if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { + TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); return false; + } + + FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true)); + return true; } FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); @@ -699,6 +711,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); tryFoldInst(TII, Fold.UseMI); + } else if (Fold.isCommuted()) { + // Restoring instruction's original operand order if fold has failed. + TII->commuteInstruction(*Fold.UseMI, false); } } } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 76c2644867aa..b48b23911105 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3571,7 +3571,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && - isMemOpHasNoClobberedMemOperand(Load)) + !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index e22166d03e9a..c10badba88f3 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1009,7 +1009,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // occurs before the instruction. Doing it here prevents any additional // S_WAITCNTs from being emitted if the instruction was marked as // requiring a WAITCNT beforehand. - if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) { + if (MI.getOpcode() == AMDGPU::S_BARRIER && + !ST->hasAutoWaitcntBeforeBarrier()) { EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); EmitSwaitcnt |= ScoreBrackets->updateByWait( diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 9f32ecfa52ff..bc86515d8b1f 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -630,7 +630,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // but we also want to wait for any other outstanding transfers before // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && - ST->needWaitcntBeforeBarrier()) || + !ST->hasAutoWaitcntBeforeBarrier()) || I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 5b840a14dbc3..73dd8b7daa4e 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && + !Ld->isVolatile() && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) && static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 001fc960b228..77fc9551cff9 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -245,9 +245,10 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; let SubtargetPredicate = Has16BitInsts in { +def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>; + let isCommutable = 1 in { -def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>; def V_INTERP_P1LL_F16 : VOP3Inst <"v_interp_p1ll_f16", VOP3_Profile<VOP_F32_F32_F16>>; def V_INTERP_P1LV_F16 : VOP3Inst <"v_interp_p1lv_f16", VOP3_Profile<VOP_F32_F32_F16_F16>>; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 46fd1f70ee99..ca68f5d42c32 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -205,6 +205,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", "AvoidMOVsShifterOperand", "true", "Avoid movs instructions with shifter operand">; @@ -788,12 +795,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, FeatureCRC, FeatureFPAO]>; -def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, - FeatureHWDivThumb, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureHWDivThumb, diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5c9d589e2625..f8b65573f9cd 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -558,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate( return Found; } -static bool isCPSRDefined(const MachineInstr *MI) { - for (const auto &MO : MI->operands()) +bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { + for (const auto &MO : MI.operands()) if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) return true; return false; } +bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + return Offset.getReg() != 0; +} + +// Load with negative register offset requires additional 1cyc and +I unit +// for Cortex A57 +bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + const MachineOperand &Opc = MI.getOperand(Op + 2); + assert(Opc.isImm()); + assert(Offset.isReg()); + int64_t OpcImm = Opc.getImm(); + + bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; + return (isSub && Offset.getReg() != 0); +} + +bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + +// Load, scaled register offset, not plus LSL2 +bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + + bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); + if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled + bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); + return !SimpleScaled; +} + +// Minus reg for ldstso addr mode +bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; +} + +// Load, scaled register offset +bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + static bool isEligibleForITBlock(const MachineInstr *MI) { switch (MI->getOpcode()) { default: return true; @@ -590,7 +645,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) { case ARM::tSUBi3: // SUB (immediate) T1 case ARM::tSUBi8: // SUB (immediate) T2 case ARM::tSUBrr: // SUB (register) T1 - return !isCPSRDefined(MI); + return !ARMBaseInstrInfo::isCPSRDefined(*MI); } } @@ -3349,6 +3404,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return DefCycle; } +bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { + unsigned BaseReg = MI.getOperand(0).getReg(); + for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { + const auto &Op = MI.getOperand(i); + if (Op.isReg() && Op.getReg() == BaseReg) + return true; + } + return false; +} +unsigned +ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { + // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops + // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops) + return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); +} + int ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -4119,7 +4190,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef())) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. return 1; @@ -4148,7 +4220,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &MCID = MI.getDesc(); - if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { + if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef()))) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index dd7fe871345a..c52e572786d4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -159,6 +159,24 @@ public: bool isPredicable(const MachineInstr &MI) const override; + // CPSR defined in instruction + static bool isCPSRDefined(const MachineInstr &MI); + bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const; + bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const; + + // Load, scaled register offset + bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load, scaled register offset, not plus LSL2 + bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const; + // Minus reg for ldstso addr mode + bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const; + // Scaled register offset in address mode 2 + bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load multiple, base reg in list + bool isLDMBaseRegInList(const MachineInstr &MI) const; + // get LDM variable defs size + unsigned getLDMVariableDefsSize(const MachineInstr &MI) const; + /// GetInstSize - Returns the size of the specified MachineInstr. /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 31a2f499a9a7..a33d025d114e 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -34,7 +34,7 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { - if (T->isArrayTy()) + if (T->isArrayTy() || T->isStructTy()) return true; EVT VT = TLI.getValueType(DL, T, true); @@ -167,8 +167,11 @@ void ARMCallLowering::splitToValueTypes( if (SplitVTs.size() == 1) { // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). - SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + auto Flags = OrigArg.Flags; + unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); + Flags.setOrigAlign(OriginalAlignment); + SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags, + OrigArg.IsFixed); return; } @@ -177,6 +180,10 @@ void ARMCallLowering::splitToValueTypes( EVT SplitVT = SplitVTs[i]; Type *SplitTy = SplitVT.getTypeForEVT(Ctx); auto Flags = OrigArg.Flags; + + unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); + Flags.setOrigAlign(OriginalAlignment); + bool NeedsConsecutiveRegisters = TLI.functionArgumentNeedsConsecutiveRegisters( SplitTy, F->getCallingConv(), F->isVarArg()); @@ -185,6 +192,7 @@ void ARMCallLowering::splitToValueTypes( if (i == e - 1) Flags.setInConsecutiveRegsLast(); } + SplitArgs.push_back( ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)), SplitTy, Flags, OrigArg.IsFixed}); diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index ec5b97cba8cd..1c7902520f2d 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -147,6 +147,9 @@ def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); (void)TII; + const ARMSubtarget *STI = + static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo()); + (void)STI; }]>; def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; @@ -420,3 +423,4 @@ include "ARMScheduleA8.td" include "ARMScheduleA9.td" include "ARMScheduleSwift.td" include "ARMScheduleR52.td" +include "ARMScheduleA57.td" diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td new file mode 100644 index 000000000000..525079d12d51 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA57.td @@ -0,0 +1,1471 @@ +//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// *** Common description and scheduling model parameters taken from AArch64 *** +// The Cortex-A57 is a traditional superscalar microprocessor with a +// conservative 3-wide in-order stage for decode and dispatch. Combined with the +// much wider out-of-order issue stage, this produced a need to carefully +// schedule micro-ops so that all three decoded each cycle are successfully +// issued as the reservation station(s) simply don't stay occupied for long. +// Therefore, IssueWidth is set to the narrower of the two at three, while still +// modeling the machine as out-of-order. + +def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>; +def IsCPSRDefinedAndPredicatedPred : + SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>; + +// Cortex A57 rev. r1p0 or later (false = r0px) +def IsR1P0AndLaterPred : SchedPredicate<[{false}]>; + +// If Addrmode3 contains register offset (not immediate) +def IsLdrAm3RegOffPred : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3RegOffPredX2 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>; +def IsLdrAm3RegOffPredX3 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>; + +// If Addrmode3 contains "minus register" +def IsLdrAm3NegRegOffPred : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3NegRegOffPredX2 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>; +def IsLdrAm3NegRegOffPredX3 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>; + +// Load, scaled register offset, not plus LSL2 +def IsLdstsoScaledNotOptimalPredX0 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>; +def IsLdstsoScaledNotOptimalPred : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>; +def IsLdstsoScaledNotOptimalPredX2 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdstsoScaledPred : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>; +def IsLdstsoScaledPredX2 : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>; + +def IsLdstsoMinusRegPredX0 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>; +def IsLdstsoMinusRegPred : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>; +def IsLdstsoMinusRegPredX2 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdrAm2ScaledPred : + SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>; + +// LDM, base reg in list +def IsLdmBaseRegInList : + SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>; + +class A57WriteLMOpsListType<list<SchedWriteRes> writes> { + list <SchedWriteRes> Writes = writes; + SchedMachineModel SchedModel = ?; +} + +// *** Common description and scheduling model parameters taken from AArch64 *** +// (AArch64SchedA57.td) +def CortexA57Model : SchedMachineModel { + let IssueWidth = 3; // 3-way decode and dispatch + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1>; // Type B micro-ops +def A57UnitI : ProcResource<2>; // Type I micro-ops +def A57UnitM : ProcResource<1>; // Type M micro-ops +def A57UnitL : ProcResource<1>; // Type L micro-ops +def A57UnitS : ProcResource<1>; // Type S micro-ops + +def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) +def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) + +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "ARMScheduleA57WriteRes.td" + +// To have "CompleteModel = 1", support of pseudos and special instructions +def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", + "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", + "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", + "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", + "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", + "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG", + "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>; + +def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; + +// Specific memory instrs +def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", + "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; + +// coprocessor moves +def : InstRW<[WriteNoop, WriteNoop], (instregex + "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", + "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", + "(t2)?MSR(banked|i|_AR|_M)?$")>; + +// Deprecated instructions +def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; + +// Pseudos +def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", + "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", + "tLDRpci_pic", "t2SUBS_PC_LR", + "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", + "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "WIN__CHKSTK", "WIN__DBZCHK")>; + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; + +// --- 3.2 Branch Instructions --- +// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ + +def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", + "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; +def : InstRW<[A57Write_1cyc_1B_1I], + (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; +// Pseudos +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; +def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", + "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; +def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; + +// --- 3.3 Arithmetic and Logical Instructions --- +// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, +// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST + +def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; + +// shift by register, conditional or unconditional +// TODO: according to the doc, conditional uses I0/I1, unconditional uses M +// Why more complex instruction uses more simple pipeline? +// May be an error in doc. +def A57WriteALUsi : SchedWriteVariant<[ + // lsl #2, lsl #1, or lsr #1. + SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def A57WriteALUsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def A57WriteALUSsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def A57ReadALUsr : SchedReadVariant<[ + SchedVar<IsPredicatedPred, [ReadDefault]>, + SchedVar<NoSchedPred, [ReadDefault]> +]>; +def : SchedAlias<WriteALUsi, A57WriteALUsi>; +def : SchedAlias<WriteALUsr, A57WriteALUsr>; +def : SchedAlias<WriteALUSsr, A57WriteALUSsr>; +def : SchedAlias<ReadALUsr, A57ReadALUsr>; + +def A57WriteCMPsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def : SchedAlias<WriteCMP, A57Write_1cyc_1I>; +def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>; +def : SchedAlias<WriteCMPsr, A57WriteCMPsr>; + +// --- 3.4 Move and Shift Instructions --- +// Move, basic +// MOV{S}, MOVW, MVN{S} +def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", + "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", + "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; + +// Move, shift by immed, setflags/no setflags +// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN +// setflags = isCPSRDefined +def A57WriteMOVsi : SchedWriteVariant<[ + SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1I]> +]>; +def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", + "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", + "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; + +// shift by register, conditional or unconditional, setflags/no setflags +def A57WriteMOVsr : SchedWriteVariant<[ + SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1I]> +]>; +def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", + "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", + "(t2|t)RORrr")>; + +// Move, top +// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later +def A57WriteMOVT : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1M]> +]>; +def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; + +def A57WriteI2pc : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; +def A57WriteI2ld : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; +def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +// +2cyc for branch forms +def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; + +// --- 3.5 Divide and Multiply Instructions --- +// Divide: SDIV, UDIV +// latency from documentration: 4 ‐ 20, maximum taken +def : SchedAlias<WriteDIV, A57Write_20cyc_1M>; +// Multiply: tMul not bound to common WriteRes types +def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; +def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>; +def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>; +def : ReadAdvance<ReadMUL, 0>; + +// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, +// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} +// Multiply-accumulate pipelines support late-forwarding of accumulate operands +// from similar μops, allowing a typical sequence of multiply-accumulate μops +// to issue one every 1 cycle (sched advance = 2). +def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } +def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; + +def : SchedAlias<WriteMAC16, A57WriteMLA>; +def : SchedAlias<WriteMAC32, A57WriteMLA>; +def : SchedAlias<ReadMAC, A57ReadMLA>; + +def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>; +def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>; + +// Multiply long: SMULL, UMULL +def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>; +def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>; + +// --- 3.6 Saturating and Parallel Arithmetic Instructions --- +// Parallel arith +// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 +// Conditional GE-setting instructions require three extra μops +// and two additional cycles to conditionally update the GE field. +def A57WriteParArith : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]> +]>; +def : InstRW< [A57WriteParArith], (instregex + "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", + "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; + +// Parallel arith with exchange: SASX, SSAX, UASX, USAX +def A57WriteParArithExch : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>, + SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]> +]>; +def : InstRW<[A57WriteParArithExch], + (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; + +// Parallel halving arith +// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex + "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", + "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; + +// Parallel halving arith with exchange +// SHASX, SHSAX, UHASX, UHSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", + "(t2)?UHASX", "(t2)?UHSAX")>; + +// Parallel saturating arith +// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", + "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; + +// Parallel saturating arith with exchange +// QASX, QSAX, UQASX, UQSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", + "(t2)?UQASX", "(t2)?UQSAX")>; + +// Saturate: SSAT, SSAT16, USAT, USAT16 +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; + +// Saturating arith: QADD, QSUB +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; + +// Saturating doubling arith: QDADD, QDSUB +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; + +// --- 3.7 Miscellaneous Data-Processing Instructions --- +// Bit field extract: SBFX, UBFX +def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; + +// Bit field insert/clear: BFI, BFC +def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; + +// Select bytes, conditional/unconditional +def A57WriteSEL : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1I]> +]>; +def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; + +// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH +def : InstRW<[A57Write_1cyc_1I], + (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; + +// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; + +// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 +def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; + +// Sum of absolute differences: USAD8, USADA8 +def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; + +// --- 3.8 Load Instructions --- + +// Load, immed offset +// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate +def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", + "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", + "PICLDR", "tLDR")>; + +def : InstRW<[A57Write_4cyc_1L], + (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; + +// For "Load, register offset, minus" we need +1cyc, +1I +def A57WriteLdrAm3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; +def A57WriteLdrAm3X2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; +def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; + +def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>, + SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; + +def A57WrBackOne : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; +} +def A57WrBackTwo : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def A57WrBackThree : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} + +// --- LDR pre-indexed --- +// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", + "LDRB_PRE_IMM", "t2LDRB_PRE")>; + +// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) +// (5 cyc load result for not-lsl2 scaled) +def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> +]>; +def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], + (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; + +def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], + (instregex "LDR(H|SH|SB)_PRE")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_PRE")>; + +// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. +def A57WriteLdrDAm3Pre : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> +]>; +def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], + (instregex "LDRD_PRE")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_PRE")>; + +// --- LDR post-indexed --- +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", + "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; + +def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], + (instregex "LDR(H|SH|SB)_POST")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_POST")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", + "LDRB_POST_REG", "LDR(B?)T_POST$")>; + +def A57WriteLdrTRegPost : SchedWriteVariant<[ + SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> +]>; +def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ + SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>, + SchedVar<NoSchedPred, [A57WrBackTwo]> +]>; +// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" +def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], + (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; + +def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_POST")>; + +// --- Preload instructions --- +// Preload, immed offset +def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", + "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; + +// Preload, register offset, +// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 +// otherwise 4cyc "L" +def A57WritePLD : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>, + SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1L]> +]>; +def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; + +// --- Load multiple instructions --- +foreach NumAddr = 1-8 in { + def A57LMAddrPred#NumAddr : + SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>; +} + +def A57LDMOpsListNoregin : A57WriteLMOpsListType< + [A57Write_3cyc_1L, A57Write_3cyc_1L, + A57Write_4cyc_1L, A57Write_4cyc_1L, + A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L]>; +def A57WriteLDMnoreginlist : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>, + SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]> +]> { let Variadic=1; } + +def A57LDMOpsListRegin : A57WriteLMOpsListType< + [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; +def A57WriteLDMreginlist : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>, + SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]> +]> { let Variadic=1; } + +def A57LDMOpsList_Upd : A57WriteLMOpsListType< + [A57WrBackOne, + A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, + A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; +def A57WriteLDM_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>, + SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>, + SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>, + SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>, + SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>, + SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>, + SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>, + SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>, + SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]> +]> { let Variadic=1; } + +def A57WriteLDM : SchedWriteVariant<[ + SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>, + SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]> +]> { let Variadic=1; } + +def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; + +// TODO: no writeback latency defined in documentation (implemented as 1 cyc) +def : InstRW<[A57WriteLDM_Upd], + (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; + +// --- 3.9 Store Instructions --- + +// Store, immed offset +def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", + "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; + +// Store, register offset +// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", +// otherwise 1cyc S. +def A57WriteStrAmLDSTSO : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>, + SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S]> +]>; +def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; + +// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. +def A57WriteStrAm3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S]> +]>; +def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; +def A57WriteStrAm3X2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S]> +]>; +def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; + +// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", + "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", + "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; + +// Store, register pre-indexed: +// 1(1) "S, I0/I1" for plus reg +// 3(2) "I0/I1, S" for minus reg +// 1(2) "S, M" for scaled plus lsl2 +// 3(2) "I0/I1, S" for other scaled +def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> +]>; +def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ + SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>, + SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], + (instregex "STR_PRE_REG", "STRB_PRE_REG")>; + +// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) +// 1(1) "S, I0/I1" for imm or reg plus +// 3(2) "I0/I1, S" for reg minus +def A57WriteStrAm3PreX2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> +]>; +def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], + (instregex "STRH_PRE")>; + +def A57WriteStrAm3PreX3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>, + SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> +]>; +def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ + SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>, + SchedVar<NoSchedPred, [A57WrBackOne]> +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], + (instregex "STRD_PRE")>; + +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", + "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; + +// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) +def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", + "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; + +// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr +// 1(1) "S, I0/I1" both for reg or imm +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; + +// --- Store multiple instructions --- +// TODO: no writeback latency defined in documentation +def A57WriteSTM : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S]> +]>; +def A57WriteSTM_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> +]>; + +def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; +def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], + (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; + +// --- 3.10 FP Data Processing Instructions --- +def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; +def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; + +// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional +def A57WriteVcmp : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>, + SchedVar<NoSchedPred, [A57Write_3cyc_1X]> +]>; +def : InstRW<[A57WriteVcmp], + (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; + +// fp convert +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; + +def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; + +// FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; + +// FP divide, FP square root +def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; +def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; +def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; +def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; + +// FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; + +// FP multiply-accumulate pipelines support late forwarding of the result +// from FP multiply μops to the accumulate operands of an +// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle +// after the FP multiply μop has been issued +// FP multiply, FZ +def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } + +def : SchedAlias<WriteFPMUL32, A57WriteVMUL>; +def : SchedAlias<WriteFPMUL64, A57WriteVMUL>; +def : ReadAdvance<ReadFPMUL, 0>; + +// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate +// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS +def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } + +// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) +// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) +// Currently, there is no way to define different read advances for VFMA operand +// from VFMA or from VMUL, so there will be 5 read advance. +// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. +// The same situation with ASIMD VMUL/VFMA instructions +// def A57ReadVFMA : SchedRead; +// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>; +// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>; +def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; + +def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; +def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; +def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; + +// --- 3.11 FP Miscellaneous Instructions --- +// VMOV: 3cyc "F0/F1" for imm/reg +def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; + +// 5cyc L for FP transfer, vfp to core reg, +// 5cyc L for FP transfer, core reg to vfp +def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; +// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). +def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; + +// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg +def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; + +// --- 3.12 FP Load Instructions --- +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; + +// FP load multiple (VLDM) + +def A57VLDMOpsListUncond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_11cyc_1L, + A57Write_12cyc_1L, A57Write_12cyc_1L]>; +def A57WriteVLDMuncond : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]> +]> { let Variadic=1; } + +def A57VLDMOpsListCond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_12cyc_1L, + A57Write_13cyc_1L, A57Write_14cyc_1L, + A57Write_15cyc_1L, A57Write_16cyc_1L, + A57Write_17cyc_1L, A57Write_18cyc_1L, + A57Write_19cyc_1L, A57Write_20cyc_1L]>; +def A57WriteVLDMcond : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]> +]> { let Variadic=1; } + +def A57WriteVLDM : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>, + SchedVar<NoSchedPred, [A57WriteVLDMuncond]> +]> { let Variadic=1; } + +def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; + +def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, + A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; +def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]> +]> { let Variadic=1; } + +def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, + A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, + A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, + A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, + A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; +def A57WriteVLDMcond_UPD : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>, + SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>, + SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>, + SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>, + SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>, + SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>, + SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>, + SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>, + SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]> +]> { let Variadic=1; } + +def A57WriteVLDM_UPD : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>, + SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]> +]> { let Variadic=1; } + +def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], + (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; + +// --- 3.13 FP Store Instructions --- +def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; + +def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; + +def A57WriteVSTMs : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S]> +]>; +def A57WriteVSTMd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>, + SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>, + SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>, + SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>, + SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>, + SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>, + SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>, + SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>, + SchedVar<NoSchedPred, [A57Write_4cyc_1S]> +]>; +def A57WriteVSTMs_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, + SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, + SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, + SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, + SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, + SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, + SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, + SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> +]>; +def A57WriteVSTMd_Upd : SchedWriteVariant<[ + SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>, + SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>, + SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>, + SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>, + SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>, + SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>, + SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>, + SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>, + SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> +]>; + +def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; +def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], + (instregex "VSTM(SIA_UPD|SDB_UPD)")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], + (instregex "VSTM(DIA_UPD|DDB_UPD)")>; + +// --- 3.14 ASIMD Integer Instructions --- + +// ASIMD absolute diff, 3cyc F0/F1 for integer VABD +def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; + +// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form +def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; +def : InstRW<[A57WriteVABAD, A57ReadVABAD], + (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; +def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; +def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], + (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; + +// ASIMD absolute diff accum long: 4(1) F1 for VABAL +def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; +def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; + +// ASIMD absolute diff long: 3cyc F0/F1 for VABDL +def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; + +// ASIMD arith, basic +def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW", + "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", + "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>; + +// ASIMD arith, complex +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", + "VQABS", "VQADD", "VQNEG", "VQSUB", + "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; + +// ASIMD compare +def : InstRW<[A57Write_3cyc_1V], + (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; + +// ASIMD logical +def : InstRW<[A57Write_3cyc_1V], + (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; + +// ASIMD max/min +def : InstRW<[A57Write_3cyc_1V], + (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; + +// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply +// and multiply-with-accumulate instructions relative to r0pX. +def A57WriteVMULD_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def : InstRW<[A57WriteVMULD_VecInt], (instregex + "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", + "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; + +// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later +def A57WriteVMULQ_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; +def : InstRW<[A57WriteVMULQ_VecInt], (instregex + "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", + "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; + +// ASIMD multiply accumulate, D-form +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAD_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def A57ReadVMLAD_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]> +]>; +def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], + (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; + +// ASIMD multiply accumulate, Q-form +// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; +def A57ReadVMLAQ_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]> +]>; +def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], + (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; + +// ASIMD multiply accumulate long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAL_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def A57ReadVMLAL_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]> +]>; +def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], + (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; + +// ASIMD multiply accumulate saturating long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (3 or 2 ReadAdvance) +def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ + SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>, + SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> +]>; +def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], + (instregex "VQDMLAL", "VQDMLSL")>; + +// ASIMD multiply long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +def A57WriteVMULL_VecInt : SchedWriteVariant<[ + SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, + SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; +def : InstRW<[A57WriteVMULL_VecInt], + (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; + +// ASIMD pairwise add and accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; +def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; + +// ASIMD shift accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; +def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; + +// ASIMD shift by immed, basic +def : InstRW<[A57Write_3cyc_1X], + (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; + +// ASIMD shift by immed, complex +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", + "VRSHRN")>; + +// ASIMD shift by immed and insert, basic, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by immed and insert, basic, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, basic, D-form +def : InstRW<[A57Write_3cyc_1X], (instregex + "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, basic, Q-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, complex, D-form +// VQRSHL, VQSHL, VRSHL +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", + "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, complex, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", + "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// --- 3.15 ASIMD Floating-Point Instructions --- +// ASIMD FP absolute value +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; + +// ASIMD FP arith +def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", + "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; + +// ASIMD FP compare +def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", + "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; + +// ASIMD FP convert, integer +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", + "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", + "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; + +// ASIMD FP convert, half-precision: 8cyc F0/F1 +def : InstRW<[A57Write_8cyc_1V], (instregex + "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", + "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", + "VCVT(f2h|h2f)")>; + +// ASIMD FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex + "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>; + +// ASIMD FP multiply +def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; + +// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence +def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57ReadVMLA_VecFP : + SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; +def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], + (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; + +// ASIMD FP negate +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; + +// ASIMD FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex + "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; + +// --- 3.16 ASIMD Miscellaneous Instructions --- + +// ASIMD bitwise insert +def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>; + +// ASIMD count +def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; + +// ASIMD duplicate, core reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; + +// ASIMD duplicate, scalar: 3cyc "F0/F1" +def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; + +// ASIMD extract +def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; + +// ASIMD move, immed +def : InstRW<[A57Write_3cyc_1V], (instregex + "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", + "VMOVQ0")>; + +// ASIMD move, narrowing +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; + +// ASIMD move, saturating +def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; + +// ASIMD reciprocal estimate +def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; + +// ASIMD reciprocal step, FZ +def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; + +// ASIMD reverse, swap, table lookup (1-2 reg) +def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; + +// ASIMD table lookup (3-4 reg) +def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; + +// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" +def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; + +// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; + +// ASIMD transpose +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; + +// ASIMD unzip/zip, D-form +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], + (instregex "VUZPd", "VZIPd")>; + +// ASIMD unzip/zip, Q-form +def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], + (instregex "VUZPq", "VZIPq")>; + +// --- 3.17 ASIMD Load Instructions --- + +// Overriden via InstRW for this processor. +def : WriteRes<WriteVLD1, []>; +def : WriteRes<WriteVLD2, []>; +def : WriteRes<WriteVLD3, []>; +def : WriteRes<WriteVLD4, []>; +def : WriteRes<WriteVST1, []>; +def : WriteRes<WriteVST2, []>; +def : WriteRes<WriteVST3, []>; +def : WriteRes<WriteVST4, []>; + +// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; +def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)wb")>; + +// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_6cyc_1L], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; + +def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; + +// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], + (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; + +// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2b(8|16|32)wb")>; + +// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", + "VLD2LN(d|q)(8|16|32)Pseudo$")>; +// 2 results + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], + (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; +// 1 result + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", + "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" +// 3 results +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)$")>; +// 1 result +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; +// 3 results + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)_UPD$")>; +// 1 result + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3LN(d|q)32$", + "VLD3LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3LN(d|q)(8|16)$", + "VLD3LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3DUP(d|q)(8|16|32)$", + "VLD3DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4LN(d|q)32$", + "VLD4LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4LN(d|q)(8|16)$", + "VLD4LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4DUP(d|q)(8|16|32)$", + "VLD4DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.18 ASIMD Store Instructions --- + +// ASIMD store, 1 element, multiple, 1 reg: 1cyc S +def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "VST1d(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 2 reg: 2cyc S +def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], + (instregex "VST1q(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 3 reg: 3cyc S +def : InstRW<[A57Write_3cyc_1S], + (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], + (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; +// ASIMD store, 1 element, multiple, 4 reg: 4cyc S +def : InstRW<[A57Write_4cyc_1S], + (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], + (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; +// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; +// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2(d|b)(8|16|32)$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2(b|d)(8|16|32)wb")>; +// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; +// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2LN(d|q)(8|16|32)_UPD", + "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 3 element, multiple, 3 reg +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3(d|q)(8|16|32)_UPD", + "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 3 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3LN(d|q)(8|16|32)_UPD", + "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 4 element, multiple, 4 reg +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST4(d|q)(8|16|32)_UPD", + "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 4 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST4LN(d|q)(8|16|32)_UPD", + "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.19 Cryptography Extensions --- +// Crypto AES ops +// AESD, AESE, AESIMC, AESMC: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; +// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; +// Crypto SHA1 xor ops: 6cyc F0/F1 +def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; +// Crypto SHA1 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; +// Crypto SHA1 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; +// Crypto SHA256 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; +// Crypto SHA256 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; + +// --- 3.20 CRC --- +def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; + +// ----------------------------------------------------------------------------- +// Common definitions +def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } +def : SchedAlias<WriteALU, A57Write_1cyc_1I>; + +def : SchedAlias<WriteBr, A57Write_1cyc_1B>; +def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>; +def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>; +def : SchedAlias<WritePreLd, A57Write_4cyc_1L>; + +def : SchedAlias<WriteLd, A57Write_4cyc_1L>; +def : SchedAlias<WriteST, A57Write_1cyc_1S>; +def : ReadAdvance<ReadALU, 0>; + +} // SchedModel = CortexA57Model + diff --git a/lib/Target/ARM/ARMScheduleA57WriteRes.td b/lib/Target/ARM/ARMScheduleA57WriteRes.td new file mode 100644 index 000000000000..670717dc7c13 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA57WriteRes.td @@ -0,0 +1,323 @@ +//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and +// four to V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } +def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; + let ResourceCycles = [18]; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; + let ResourceCycles = [19]; } +def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20; + let ResourceCycles = [20]; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; } +def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; } +def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; + let ResourceCycles = [35]; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } + +// A57Write_3cyc_1L - A57Write_20cyc_1L +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> { + let Latency = Lat; + } +} + +// A57Write_4cyc_1S - A57Write_16cyc_1S +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> { + let Latency = Lat; + } +} + +def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; } +def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; } +def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; } +def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 64; + let NumMicroOps = 2; + let ResourceCycles = [32, 32]; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS, + A57UnitM]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 36; + let NumMicroOps = 2; + let ResourceCycles = [18, 18]; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 9; + let NumMicroOps = 3; +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index d2630685d91b..af682dd8321c 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -234,6 +234,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate = false; + /// CheapPredicableCPSRDef - If true, disable +1 predication cost + /// for instructions updating CPSR. Enabled for Cortex-A57. + bool CheapPredicableCPSRDef = false; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting /// movs with shifter operand (i.e. asr, lsl, lsr). bool AvoidMOVsShifterOperand = false; @@ -543,6 +547,7 @@ public: bool nonpipelinedVFP() const { return NonpipelinedVFP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRetAddrStack() const { return HasRetAddrStack; } bool hasMPExtension() const { return HasMPExtension; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 0fef91ec4d3e..b76da727237c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -3419,9 +3419,7 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); if (NewOpcode >= 0) return NewOpcode; - - dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n'; - llvm_unreachable(nullptr); + return 0; } int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp index 4593fc92ca6f..35948e36ad91 100644 --- a/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -135,6 +135,14 @@ private: // returns true on success. static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry); + // Attempts to reduce LBU/LHU instruction into LBU16/LHU16, + // returns true on success. + static bool ReduceLXUtoLXU16(MachineInstr *MI, const ReduceEntry &Entry); + + // Attempts to reduce SB/SH instruction into SB16/SH16, + // returns true on success. + static bool ReduceSXtoSX16(MachineInstr *MI, const ReduceEntry &Entry); + // Attempts to reduce arithmetic instructions, returns true on success static bool ReduceArithmeticInstructions(MachineInstr *MI, const ReduceEntry &Entry); @@ -162,10 +170,26 @@ llvm::SmallVector<ReduceEntry, 16> MicroMipsSizeReduce::ReduceTable = { {RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM), ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::LBu, Mips::LBU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)}, + {RT_OneInstr, OpCodes(Mips::LBu_MM, Mips::LBU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)}, + {RT_OneInstr, OpCodes(Mips::LHu, Mips::LHU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::LHu_MM, Mips::LHU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, {RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP, OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP, OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SH, Mips::SH16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SH_MM, Mips::SH16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, {RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM), ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), ImmField(0, 0, 0, -1)}, @@ -193,6 +217,13 @@ static bool isMMThreeBitGPRegister(const MachineOperand &MO) { return false; } +// Returns true if the machine operand MO is register $0, $17, or $2-$7. +static bool isMMSourceRegister(const MachineOperand &MO) { + if (MO.isReg() && Mips::GPRMM16ZeroRegClass.contains(MO.getReg())) + return true; + return false; +} + // Returns true if the operand Op is an immediate value // and writes the immediate value into variable Imm static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) { @@ -279,6 +310,32 @@ bool MicroMipsSizeReduce::ReduceArithmeticInstructions( return ReplaceInstruction(MI, Entry); } +bool MicroMipsSizeReduce::ReduceLXUtoLXU16(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!isMMThreeBitGPRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceSXtoSX16(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!isMMSourceRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt index 54619589c341..35a67134775a 100644 --- a/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -88,6 +88,3 @@ pr45695.c wasm-o pr49279.c wasm-o pr49390.c wasm-o pr52286.c wasm-o - -# fatal error: error in backend: data symbols must have a size set with .size -921110-1.c wasm-o diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0a41f35f9320..5303d7a406ad 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4753,7 +4753,7 @@ static void scaleShuffleMask(int Scale, ArrayRef<int> Mask, SmallVectorImpl<int> &ScaledMask) { assert(0 < Scale && "Unexpected scaling factor"); int NumElts = Mask.size(); - ScaledMask.assign(NumElts * Scale, -1); + ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1); for (int i = 0; i != NumElts; ++i) { int M = Mask[i]; @@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, return true; } case ISD::SCALAR_TO_VECTOR: { - // Match against a scalar_to_vector of an extract from a similar vector. + // Match against a scalar_to_vector of an extract from a vector, + // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar. SDValue N0 = N.getOperand(0); - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - N0.getOperand(0).getValueType() != VT || - !isa<ConstantSDNode>(N0.getOperand(1)) || - NumElts <= N0.getConstantOperandVal(1) || - !N->isOnlyUserOf(N0.getNode())) + SDValue SrcExtract; + + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getValueType() == VT) { + SrcExtract = N0; + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRW && + cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16); + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRB && + cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); + } + + if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) || + NumElts <= SrcExtract.getConstantOperandVal(1)) return false; - Ops.push_back(N0.getOperand(0)); - Mask.push_back(N0.getConstantOperandVal(1)); - Mask.append(NumElts - 1, SM_SentinelUndef); + + SDValue SrcVec = SrcExtract.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + + Ops.push_back(SrcVec); + Mask.push_back(SrcExtract.getConstantOperandVal(1)); + Mask.append(NumZeros, SM_SentinelZero); + Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; } case X86ISD::PINSRB: @@ -6542,12 +6564,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue, APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { - assert((ScalarSize == 32 || ScalarSize == 64) && - "Unsupported floating point scalar size"); - if (ScalarSize == 32) - Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat()); - else - Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble()); + if (ScalarSize == 32) { + Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); + } else { + assert(ScalarSize == 64 && "Unsupported floating point scalar size"); + Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); + } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); @@ -6633,11 +6655,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); - Constant *C = SplatBitSize == 32 - ? ConstantFP::get(Type::getFloatTy(*Ctx), - SplatValue.bitsToFloat()) - : ConstantFP::get(Type::getDoubleTy(*Ctx), - SplatValue.bitsToDouble()); + // Lower the splat via APFloat directly, to avoid any conversion. + Constant *C = + SplatBitSize == 32 + ? ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEsingle(), SplatValue)) + : ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; @@ -8003,7 +8027,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef<int> Mask, SmallVectorImpl<int> &RepeatedMask) { - int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); + auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); RepeatedMask.assign(LaneSize, -1); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { @@ -16997,7 +17021,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); - ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get(); bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); @@ -17024,18 +17048,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is // available. SDValue Cmp; - unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); + unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); if (SSECC == 8) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; - if (SetCCOpcode == ISD::SETUEQ) { + if (Cond == ISD::SETUEQ) { CC0 = 3; // UNORD CC1 = 0; // EQ CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) : static_cast<unsigned>(ISD::OR); } else { - assert(SetCCOpcode == ISD::SETONE); + assert(Cond == ISD::SETONE); CC0 = 7; // ORD CC1 = 4; // NEQ CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) : @@ -17082,7 +17106,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // 2. The original operand type has been promoted to a 256-bit vector. // // Note that condition 2. only applies for AVX targets. - SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); + SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond); return DAG.getZExtOrTrunc(NewOp, dl, VT); } @@ -17122,7 +17146,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; - switch (SetCCOpcode) { + switch (Cond) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: case ISD::SETLT: CmpMode = 0x00; break; @@ -17137,60 +17161,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Are we comparing unsigned or signed integers? - unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) - ? X86ISD::VPCOMU : X86ISD::VPCOM; + unsigned Opc = + ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CmpMode, dl, MVT::i8)); } - // We are handling one of the integer comparisons here. Since SSE only has + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc; - bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; - bool Subus = false; - - switch (SetCCOpcode) { - default: llvm_unreachable("Unexpected SETCC condition"); - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; - case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGT: Opc = X86ISD::PCMPGT; break; - case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETLE: Opc = X86ISD::PCMPGT; - Invert = true; break; - case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGT: Opc = X86ISD::PCMPGT; - FlipSigns = true; break; - case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULE: Opc = X86ISD::PCMPGT; - FlipSigns = true; Invert = true; break; - } + unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ + : X86ISD::PCMPGT; + bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || + Cond == ISD::SETGE || Cond == ISD::SETUGE; + bool Invert = Cond == ISD::SETNE || + (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); + bool FlipSigns = ISD::isUnsignedIntSetCC(Cond); // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); - bool hasMinMax = - (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) - || (Subtarget.hasSSE2() && (VET == MVT::i8)); - - if (hasMinMax) { - switch (SetCCOpcode) { + bool HasMinMax = + (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || + (Subtarget.hasSSE2() && (VET == MVT::i8)); + bool MinMax = false; + if (HasMinMax) { + switch (Cond) { default: break; case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break; case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break; } - if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } + if (MinMax) + Swap = Invert = FlipSigns = false; } - bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); - if (!MinMax && hasSubus) { + bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); + bool Subus = false; + if (!MinMax && HasSubus) { // As another special case, use PSUBUS[BW] when it's profitable. E.g. for // Op0 u<= Op1: // t = psubus Op0, Op1 // pcmpeq t, <0..0> - switch (SetCCOpcode) { + switch (Cond) { default: break; case ISD::SETULT: { // If the comparison is against a constant we can turn this into a diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 613b4a7f03e9..626a891f65c6 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -228,7 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, SmallVector<ReturnInst *, 4> Returns; - CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns); + CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); // Remove old returns. for (ReturnInst *Return : Returns) diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index ea48043f9381..44e1f9b404ed 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) { size_t FinalSuspendIndex = 0; clear(*this); SmallVector<CoroFrameInst *, 8> CoroFrames; + SmallVector<CoroSaveInst *, 2> UnusedCoroSaves; + for (Instruction &I : instructions(F)) { if (auto II = dyn_cast<IntrinsicInst>(&I)) { switch (II->getIntrinsicID()) { @@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) { case Intrinsic::coro_frame: CoroFrames.push_back(cast<CoroFrameInst>(II)); break; + case Intrinsic::coro_save: + // After optimizations, coro_suspends using this coro_save might have + // been removed, remember orphaned coro_saves to remove them later. + if (II->use_empty()) + UnusedCoroSaves.push_back(cast<CoroSaveInst>(II)); + break; case Intrinsic::coro_suspend: CoroSuspends.push_back(cast<CoroSuspendInst>(II)); if (CoroSuspends.back()->isFinal()) { @@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) { if (HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); + + // Remove orphaned coro.saves. + for (CoroSaveInst *CoroSave : UnusedCoroSaves) + CoroSave->eraseFromParent(); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 28cc81c76d4f..5cc29a493798 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1188,6 +1188,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { SCCNodes.insert(F); } + // Skip it if the SCC only contains optnone functions. + if (SCCNodes.empty()) + return Changed; + Changed |= addArgumentReturnedAttrs(SCCNodes); Changed |= addReadAttrs(SCCNodes, AARGetter); Changed |= addArgumentAttrs(SCCNodes); diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 231487923fad..6d34ab8b0d96 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -292,8 +292,7 @@ static void computeImportForFunction( static void ComputeImportForModule( const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList, - StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr, - const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) { + StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector<EdgeInfo, 128> Worklist; @@ -301,7 +300,7 @@ static void ComputeImportForModule( // Populate the worklist with the import for the functions in the current // module for (auto &GVSummary : DefinedGVSummaries) { - if (DeadSymbols && DeadSymbols->count(GVSummary.first)) { + if (!Index.isGlobalValueLive(GVSummary.second)) { DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); continue; } @@ -344,15 +343,14 @@ void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, StringMap<FunctionImporter::ImportMapTy> &ImportLists, - StringMap<FunctionImporter::ExportSetTy> &ExportLists, - const DenseSet<GlobalValue::GUID> *DeadSymbols) { + StringMap<FunctionImporter::ExportSetTy> &ExportLists) { // For each module that has function defined, compute the import/export lists. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { auto &ImportList = ImportLists[DefinedGVSummaries.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedGVSummaries.first() << "'\n"); ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, - &ExportLists, DeadSymbols); + &ExportLists); } // When computing imports we added all GUIDs referenced by anything @@ -414,82 +412,71 @@ void llvm::ComputeCrossModuleImportForModule( #endif } -DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols( - const ModuleSummaryIndex &Index, +void llvm::computeDeadSymbols( + ModuleSummaryIndex &Index, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { + assert(!Index.withGlobalValueDeadStripping()); if (!ComputeDead) - return DenseSet<GlobalValue::GUID>(); + return; if (GUIDPreservedSymbols.empty()) // Don't do anything when nothing is live, this is friendly with tests. - return DenseSet<GlobalValue::GUID>(); - DenseSet<ValueInfo> LiveSymbols; + return; + unsigned LiveSymbols = 0; SmallVector<ValueInfo, 128> Worklist; Worklist.reserve(GUIDPreservedSymbols.size() * 2); for (auto GUID : GUIDPreservedSymbols) { ValueInfo VI = Index.getValueInfo(GUID); if (!VI) continue; - DEBUG(dbgs() << "Live root: " << VI.getGUID() << "\n"); - LiveSymbols.insert(VI); - Worklist.push_back(VI); + for (auto &S : VI.getSummaryList()) + S->setLive(true); } + // Add values flagged in the index as live roots to the worklist. - for (const auto &Entry : Index) { - bool IsLiveRoot = llvm::any_of( - Entry.second.SummaryList, - [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) { - return Summary->liveRoot(); - }); - if (!IsLiveRoot) - continue; - DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n"); - Worklist.push_back(ValueInfo(&Entry)); - } + for (const auto &Entry : Index) + for (auto &S : Entry.second.SummaryList) + if (S->isLive()) { + DEBUG(dbgs() << "Live root: " << Entry.first << "\n"); + Worklist.push_back(ValueInfo(&Entry)); + ++LiveSymbols; + break; + } + + // Make value live and add it to the worklist if it was not live before. + // FIXME: we should only make the prevailing copy live here + auto visit = [&](ValueInfo VI) { + for (auto &S : VI.getSummaryList()) + if (S->isLive()) + return; + for (auto &S : VI.getSummaryList()) + S->setLive(true); + ++LiveSymbols; + Worklist.push_back(VI); + }; while (!Worklist.empty()) { auto VI = Worklist.pop_back_val(); - - // FIXME: we should only make the prevailing copy live here for (auto &Summary : VI.getSummaryList()) { - for (auto Ref : Summary->refs()) { - if (LiveSymbols.insert(Ref).second) { - DEBUG(dbgs() << "Marking live (ref): " << Ref.getGUID() << "\n"); - Worklist.push_back(Ref); - } - } - if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) { - for (auto Call : FS->calls()) { - if (LiveSymbols.insert(Call.first).second) { - DEBUG(dbgs() << "Marking live (call): " << Call.first.getGUID() - << "\n"); - Worklist.push_back(Call.first); - } - } - } + for (auto Ref : Summary->refs()) + visit(Ref); + if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) + for (auto Call : FS->calls()) + visit(Call.first); if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) { auto AliaseeGUID = AS->getAliasee().getOriginalName(); ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID); - if (AliaseeVI && LiveSymbols.insert(AliaseeVI).second) { - DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n"); - Worklist.push_back(AliaseeVI); - } + if (AliaseeVI) + visit(AliaseeVI); } } } - DenseSet<GlobalValue::GUID> DeadSymbols; - DeadSymbols.reserve( - std::min(Index.size(), Index.size() - LiveSymbols.size())); - for (auto &Entry : Index) { - if (!LiveSymbols.count(ValueInfo(&Entry))) { - DEBUG(dbgs() << "Marking dead: " << Entry.first << "\n"); - DeadSymbols.insert(Entry.first); - } - } - DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and " - << DeadSymbols.size() << " symbols Dead \n"); - NumDeadSymbols += DeadSymbols.size(); - NumLiveSymbols += LiveSymbols.size(); - return DeadSymbols; + Index.setWithGlobalValueDeadStripping(); + + unsigned DeadSymbols = Index.size() - LiveSymbols; + DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols + << " symbols Dead \n"); + NumDeadSymbols += DeadSymbols; + NumLiveSymbols += LiveSymbols; } /// Compute the set of summaries needed for a ThinLTO backend compilation of diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index ca4ee92f971a..7bec50d9d25f 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1442,9 +1442,8 @@ bool LowerTypeTestsModule::lower() { for (auto &P : *ExportSummary) { for (auto &S : P.second.SummaryList) { auto *FS = dyn_cast<FunctionSummary>(S.get()); - if (!FS) + if (!FS || !ExportSummary->isGlobalValueLive(FS)) continue; - // FIXME: Only add live functions. for (GlobalValue::GUID G : FS->type_tests()) for (Metadata *MD : MetadataByGUID[G]) AddTypeIdUse(MD).IsExported = true; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index bc0967448cdd..ea805efc66b7 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -68,6 +68,10 @@ static cl::opt<int> cl::desc("Relative frequency of outline region to " "the entry block")); +static cl::opt<unsigned> ExtraOutliningPenalty( + "partial-inlining-extra-penalty", cl::init(0), cl::Hidden, + cl::desc("A debug option to add additional penalty to the computed one.")); + namespace { struct FunctionOutliningInfo { @@ -83,7 +87,7 @@ struct FunctionOutliningInfo { SmallVector<BasicBlock *, 4> Entries; // The return block that is not included in the outlined region. BasicBlock *ReturnBlock; - // The dominating block of the region ot be outlined. + // The dominating block of the region to be outlined. BasicBlock *NonReturnBlock; // The set of blocks in Entries that that are predecessors to ReturnBlock SmallVector<BasicBlock *, 4> ReturnBlockPreds; @@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq( if (hasProfileData(F, OI)) return OutlineRegionRelFreq; - // When profile data is not available, we need to be very - // conservative in estimating the overall savings. We need to make sure - // the outline region relative frequency is not below the threshold - // specified by the option. - OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); + // When profile data is not available, we need to be conservative in + // estimating the overall savings. Static branch prediction can usually + // guess the branch direction right (taken/non-taken), but the guessed + // branch probability is usually not biased enough. In case when the + // outlined region is predicted to be likely, its probability needs + // to be made higher (more biased) to not under-estimate the cost of + // function outlining. On the other hand, if the outlined region + // is predicted to be less likely, the predicted probablity is usually + // higher than the actual. For instance, the actual probability of the + // less likely target is only 5%, but the guessed probablity can be + // 40%. In the latter case, there is no need for further adjustement. + // FIXME: add an option for this. + if (OutlineRegionRelFreq < BranchProbability(45, 100)) + return OutlineRegionRelFreq; + + OutlineRegionRelFreq = std::max( + OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); return OutlineRegionRelFreq; } @@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { if (isa<DbgInfoIntrinsic>(I)) continue; + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Alloca: + continue; + case Instruction::GetElementPtr: + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + continue; + default: + break; + } + + IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + if (CallInst *CI = dyn_cast<CallInst>(I)) { InlineCost += getCallsiteCost(CallSite(CI), DL); continue; @@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts( Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction, BasicBlock *OutliningCallBB) { // First compute the cost of the outlined region 'OI' in the original - // function 'F': + // function 'F'. + // FIXME: The code extractor (outliner) can now do code sinking/hoisting + // to reduce outlining cost. The hoisted/sunk code currently do not + // incur any runtime cost so it is still OK to compare the outlined + // function cost with the outlined region in the original function. + // If this ever changes, we will need to introduce new extractor api + // to pass the information. int OutlinedRegionCost = 0; for (BasicBlock &BB : *F) { if (&BB != OI->ReturnBlock && @@ -542,8 +584,14 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts( assert(OutlinedFunctionCost >= OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); - int OutliningRuntimeOverhead = - OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost); + // The code extractor introduces a new root and exit stub blocks with + // additional unconditional branches. Those branches will be eliminated + // later with bb layout. The cost should be adjusted accordingly: + OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; + + int OutliningRuntimeOverhead = OutliningFuncCallCost + + (OutlinedFunctionCost - OutlinedRegionCost) + + ExtraOutliningPenalty; return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead, OutlinedRegionCost); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9fd3a9021a27..16fba32e9805 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -157,7 +157,7 @@ static cl::opt<bool> static cl::opt<bool> EnableGVNSink( "enable-gvn-sink", cl::init(false), cl::Hidden, - cl::desc("Enable the GVN sinking pass (default = on)")); + cl::desc("Enable the GVN sinking pass (default = off)")); PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2c2b7317a1c0..c0798e164c39 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4508,13 +4508,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Builder->CreateAnd(A, B), Op1); - // ~x < ~y --> y < x - // ~x < cst --> ~cst < x + // ~X < ~Y --> Y < X + // ~X < C --> X > ~C if (match(Op0, m_Not(m_Value(A)))) { if (match(Op1, m_Not(m_Value(B)))) return new ICmpInst(I.getPredicate(), B, A); - if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) - return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + + const APInt *C; + if (match(Op1, m_APInt(C))) + return new ICmpInst(I.getSwappedPredicate(), A, + ConstantInt::get(Op1->getType(), ~(*C))); } Instruction *AddI = nullptr; diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ff753c20a94a..df4ee9969c02 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2087,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { switch (I.getNumArgOperands()) { case 3: assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode"); + LLVM_FALLTHROUGH; case 2: CopyOp = I.getArgOperand(0); ConvertOp = I.getArgOperand(1); diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 325b64cd8b43..8aa40d1759de 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -57,6 +57,11 @@ static const char *const SanCovTracePCGuardName = "__sanitizer_cov_trace_pc_guard"; static const char *const SanCovTracePCGuardInitName = "__sanitizer_cov_trace_pc_guard_init"; +static const char *const SanCov8bitCountersInitName = + "__sanitizer_cov_8bit_counters_init"; + +static const char *const SanCovGuardsSectionName = "sancov_guards"; +static const char *const SanCovCountersSectionName = "sancov_counters"; static cl::opt<int> ClCoverageLevel( "sanitizer-coverage-level", @@ -64,14 +69,18 @@ static cl::opt<int> ClCoverageLevel( "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); -static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc", - cl::desc("Experimental pc tracing"), - cl::Hidden, cl::init(false)); +static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc", + cl::desc("Experimental pc tracing"), cl::Hidden, + cl::init(false)); static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard", cl::desc("pc tracing with a guard"), cl::Hidden, cl::init(false)); +static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", + cl::desc("increments 8-bit counter for every edge"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> ClCMPTracing("sanitizer-coverage-trace-compares", cl::desc("Tracing of CMP and similar instructions"), @@ -123,9 +132,10 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.TracePC |= ClExperimentalTracePC; + Options.TracePC |= ClTracePC; Options.TracePCGuard |= ClTracePCGuard; - if (!Options.TracePCGuard && !Options.TracePC) + Options.Inline8bitCounters |= ClInline8bitCounters; + if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters) Options.TracePCGuard = true; // TracePCGuard is default. Options.NoPrune |= !ClPruneBlocks; return Options; @@ -159,11 +169,22 @@ private: void InjectTraceForSwitch(Function &F, ArrayRef<Instruction *> SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); - void CreateFunctionGuardArray(size_t NumGuards, Function &F); + GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, + Function &F, Type *Ty, + const char *Section); + void CreateFunctionLocalArrays(size_t NumGuards, Function &F); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); - StringRef getSanCovTracePCGuardSection() const; - StringRef getSanCovTracePCGuardSectionStart() const; - StringRef getSanCovTracePCGuardSectionEnd() const; + void CreateInitCallForSection(Module &M, const char *InitFunctionName, + Type *Ty, const std::string &Section); + + void SetNoSanitizeMetadata(Instruction *I) { + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); + } + + std::string getSectionName(const std::string &Section) const; + std::string getSectionStart(const std::string &Section) const; + std::string getSectionEnd(const std::string &Section) const; Function *SanCovTracePCIndir; Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; @@ -171,20 +192,48 @@ private: Function *SanCovTraceGepFunction; Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy; + Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, + *Int8Ty, *Int8PtrTy; Module *CurModule; Triple TargetTriple; LLVMContext *C; const DataLayout *DL; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - bool HasSancovGuardsSection; + GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. SanitizerCoverageOptions Options; }; } // namespace +void SanitizerCoverageModule::CreateInitCallForSection( + Module &M, const char *InitFunctionName, Type *Ty, + const std::string &Section) { + IRBuilder<> IRB(M.getContext()); + Function *CtorFunc; + GlobalVariable *SecStart = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionStart(Section)); + SecStart->setVisibility(GlobalValue::HiddenVisibility); + GlobalVariable *SecEnd = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, getSectionEnd(Section)); + SecEnd->setVisibility(GlobalValue::HiddenVisibility); + + std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( + M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, + {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)}); + + if (TargetTriple.supportsCOMDAT()) { + // Use comdat to dedup CtorFunc. + CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); + } else { + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); + } +} + bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; @@ -192,15 +241,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { DL = &M.getDataLayout(); CurModule = &M; TargetTriple = Triple(M.getTargetTriple()); - HasSancovGuardsSection = false; + FunctionGuardArray = nullptr; + Function8bitCounterArray = nullptr; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); IntptrPtrTy = PointerType::getUnqual(IntptrTy); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); + Int8Ty = IRB.getInt8Ty(); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); @@ -243,34 +295,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { for (auto &F : M) runOnFunction(F); - // Create variable for module (compilation unit) name - if (Options.TracePCGuard) { - if (HasSancovGuardsSection) { - Function *CtorFunc; - GlobalVariable *SecStart = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionStart()); - SecStart->setVisibility(GlobalValue::HiddenVisibility); - GlobalVariable *SecEnd = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionEnd()); - SecEnd->setVisibility(GlobalValue::HiddenVisibility); - - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovTracePCGuardInitName, - {Int32PtrTy, Int32PtrTy}, - {IRB.CreatePointerCast(SecStart, Int32PtrTy), - IRB.CreatePointerCast(SecEnd, Int32PtrTy)}); - - if (TargetTriple.supportsCOMDAT()) { - // Use comdat to dedup CtorFunc. - CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); - } else { - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } - } - } + if (FunctionGuardArray) + CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy, + SanCovGuardsSectionName); + if (Function8bitCounterArray) + CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy, + SanCovCountersSectionName); + return true; } @@ -393,17 +424,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForGep(F, GepTraceTargets); return true; } -void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards, - Function &F) { - if (!Options.TracePCGuard) return; - HasSancovGuardsSection = true; - ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards); - FunctionGuardArray = new GlobalVariable( - *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage, - Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_"); + +GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( + size_t NumElements, Function &F, Type *Ty, const char *Section) { + ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); + auto Array = new GlobalVariable( + *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, + Constant::getNullValue(ArrayTy), "__sancov_gen_"); if (auto Comdat = F.getComdat()) - FunctionGuardArray->setComdat(Comdat); - FunctionGuardArray->setSection(getSanCovTracePCGuardSection()); + Array->setComdat(Comdat); + Array->setSection(getSectionName(Section)); + return Array; +} +void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards, + Function &F) { + if (Options.TracePCGuard) + FunctionGuardArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int32Ty, SanCovGuardsSectionName); + if (Options.Inline8bitCounters) + Function8bitCounterArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int8Ty, SanCovCountersSectionName); } bool SanitizerCoverageModule::InjectCoverage(Function &F, @@ -413,11 +453,11 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, case SanitizerCoverageOptions::SCK_None: return false; case SanitizerCoverageOptions::SCK_Function: - CreateFunctionGuardArray(1, F); + CreateFunctionLocalArrays(1, F); InjectCoverageAtBlock(F, F.getEntryBlock(), 0); return true; default: { - CreateFunctionGuardArray(AllBlocks.size(), F); + CreateFunctionLocalArrays(AllBlocks.size(), F); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) InjectCoverageAtBlock(F, *AllBlocks[i], i); return true; @@ -436,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - assert(Options.TracePC || Options.TracePCGuard); + assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); @@ -564,8 +604,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else { - assert(Options.TracePCGuard); + } + if (Options.TracePCGuard) { auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), @@ -573,26 +613,39 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } + if (Options.Inline8bitCounters) { + auto CounterPtr = IRB.CreateGEP( + Function8bitCounterArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(CounterPtr); + auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); + auto Store = IRB.CreateStore(Inc, CounterPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); + } } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const { +std::string +SanitizerCoverageModule::getSectionName(const std::string &Section) const { if (TargetTriple.getObjectFormat() == Triple::COFF) return ".SCOV$M"; if (TargetTriple.isOSBinFormatMachO()) - return "__DATA,__sancov_guards"; - return "__sancov_guards"; + return "__DATA,__" + Section; + return "__" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const { +std::string +SanitizerCoverageModule::getSectionStart(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$start$__DATA$__sancov_guards"; - return "__start___sancov_guards"; + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const { +std::string +SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$end$__DATA$__sancov_guards"; - return "__stop___sancov_guards"; + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 3953198fe605..9a7882211bac 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1823,6 +1823,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { // An IV counter must preserve its type. if (IncI->getNumOperands() == 2) break; + LLVM_FALLTHROUGH; default: return nullptr; } diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 930696b036c0..7d8da9b453f9 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -83,6 +84,149 @@ static bool handleSwitchExpect(SwitchInst &SI) { return true; } +/// Handler for PHINodes that define the value argument to an +/// @llvm.expect call. +/// +/// If the operand of the phi has a constant value and it 'contradicts' +/// with the expected value of phi def, then the corresponding incoming +/// edge of the phi is unlikely to be taken. Using that information, +/// the branch probability info for the originating branch can be inferred. +static void handlePhiDef(CallInst *Expect) { + Value &Arg = *Expect->getArgOperand(0); + ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1)); + const APInt &ExpectedPhiValue = ExpectedValue->getValue(); + + // Walk up in backward a list of instructions that + // have 'copy' semantics by 'stripping' the copies + // until a PHI node or an instruction of unknown kind + // is reached. Negation via xor is also handled. + // + // C = PHI(...); + // B = C; + // A = B; + // D = __builtin_expect(A, 0); + // + Value *V = &Arg; + SmallVector<Instruction *, 4> Operations; + while (!isa<PHINode>(V)) { + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(V)) { + V = ZExt->getOperand(0); + Operations.push_back(ZExt); + continue; + } + + if (SExtInst *SExt = dyn_cast<SExtInst>(V)) { + V = SExt->getOperand(0); + Operations.push_back(SExt); + continue; + } + + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V); + if (!BinOp || BinOp->getOpcode() != Instruction::Xor) + return; + + ConstantInt *CInt = dyn_cast<ConstantInt>(BinOp->getOperand(1)); + if (!CInt) + return; + + V = BinOp->getOperand(0); + Operations.push_back(BinOp); + } + + // Executes the recorded operations on input 'Value'. + auto ApplyOperations = [&](const APInt &Value) { + APInt Result = Value; + for (auto Op : llvm::reverse(Operations)) { + switch (Op->getOpcode()) { + case Instruction::Xor: + Result ^= cast<ConstantInt>(Op->getOperand(1))->getValue(); + break; + case Instruction::ZExt: + Result = Result.zext(Op->getType()->getIntegerBitWidth()); + break; + case Instruction::SExt: + Result = Result.sext(Op->getType()->getIntegerBitWidth()); + break; + default: + llvm_unreachable("Unexpected operation"); + } + } + return Result; + }; + + auto *PhiDef = dyn_cast<PHINode>(V); + + // Get the first dominating conditional branch of the operand + // i's incoming block. + auto GetDomConditional = [&](unsigned i) -> BranchInst * { + BasicBlock *BB = PhiDef->getIncomingBlock(i); + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (BI && BI->isConditional()) + return BI; + BB = BB->getSinglePredecessor(); + if (!BB) + return nullptr; + BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + return nullptr; + return BI; + }; + + // Now walk through all Phi operands to find phi oprerands with values + // conflicting with the expected phi output value. Any such operand + // indicates the incoming edge to that operand is unlikely. + for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) { + + Value *PhiOpnd = PhiDef->getIncomingValue(i); + ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd); + if (!CI) + continue; + + // Not an interesting case when IsUnlikely is false -- we can not infer + // anything useful when the operand value matches the expected phi + // output. + if (ExpectedPhiValue == ApplyOperations(CI->getValue())) + continue; + + BranchInst *BI = GetDomConditional(i); + if (!BI) + continue; + + MDBuilder MDB(PhiDef->getContext()); + + // There are two situations in which an operand of the PhiDef comes + // from a given successor of a branch instruction BI. + // 1) When the incoming block of the operand is the successor block; + // 2) When the incoming block is BI's enclosing block and the + // successor is the PhiDef's enclosing block. + // + // Returns true if the operand which comes from OpndIncomingBB + // comes from outgoing edge of BI that leads to Succ block. + auto *OpndIncomingBB = PhiDef->getIncomingBlock(i); + auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) { + if (OpndIncomingBB == Succ) + // If this successor is the incoming block for this + // Phi operand, then this successor does lead to the Phi. + return true; + if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent()) + // Otherwise, if the edge is directly from the branch + // to the Phi, this successor is the one feeding this + // Phi operand. + return true; + return false; + }; + + if (IsOpndComingFromSuccessor(BI->getSuccessor(1))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight)); + else if (IsOpndComingFromSuccessor(BI->getSuccessor(0))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight)); + } +} + // Handle both BranchInst and SelectInst. template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) { @@ -99,25 +243,31 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) { ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition()); CmpInst::Predicate Predicate; - uint64_t ValueComparedTo = 0; + ConstantInt *CmpConstOperand = nullptr; if (!CmpI) { CI = dyn_cast<CallInst>(BSI.getCondition()); Predicate = CmpInst::ICMP_NE; - ValueComparedTo = 0; } else { Predicate = CmpI->getPredicate(); if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ) return false; - ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1)); + + CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1)); if (!CmpConstOperand) return false; - ValueComparedTo = CmpConstOperand->getZExtValue(); CI = dyn_cast<CallInst>(CmpI->getOperand(0)); } if (!CI) return false; + uint64_t ValueComparedTo = 0; + if (CmpConstOperand) { + if (CmpConstOperand->getBitWidth() > 64) + return false; + ValueComparedTo = CmpConstOperand->getZExtValue(); + } + Function *Fn = CI->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) return false; @@ -181,6 +331,10 @@ static bool lowerExpectIntrinsic(Function &F) { Function *Fn = CI->getCalledFunction(); if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + // Before erasing the llvm.expect, walk backward to find + // phi that define llvm.expect's first arg, and + // infer branch probability: + handlePhiDef(CI); Value *Exp = CI->getArgOperand(0); CI->replaceAllUsesWith(Exp); CI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 77b2bd84f9b6..350b50ffcdd4 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// Rewrite an existing set of gc.statepoints such that they make potential -// relocations performed by the garbage collector explicit in the IR. +// Rewrite call/invoke instructions so as to make potential relocations +// performed by the garbage collector explicit in the IR. // //===----------------------------------------------------------------------===// @@ -2094,9 +2094,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // live in the IR. We'll remove all of these when done. SmallVector<CallInst *, 64> Holders; - // Insert a dummy call with all of the arguments to the vm_state we'll need - // for the actual safepoint insertion. This ensures reference arguments in - // the deopt argument list are considered live through the safepoint (and + // Insert a dummy call with all of the deopt operands we'll need for the + // actual safepoint insertion as arguments. This ensures reference operands + // in the deopt argument list are considered live through the safepoint (and // thus makes sure they get relocated.) for (CallSite CS : ToUpdate) { SmallVector<Value *, 64> DeoptValues; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 6e113bccff94..fb1b5813fd79 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3698,7 +3698,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); + auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. LoadInst *PLoad; @@ -3709,7 +3710,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, LoadBasePtr->getName() + "."), + LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } @@ -3719,7 +3720,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1ec3d0d49637..1c1a75c111e9 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -37,10 +37,10 @@ using namespace llvm; /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - ValueToValueMapTy &VMap, +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo) { + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { DenseMap<const MDNode *, MDNode *> Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); @@ -50,10 +50,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + + if (DIFinder && F->getParent() && II->getDebugLoc()) + DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get()); + Instruction *NewInst = II->clone(); - if (F && F->getSubprogram()) - DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(), - F->getSubprogram(), Cache); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -122,31 +123,38 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), OldAttrs.getRetAttributes(), NewArgAttrs)); + bool MustCloneSP = + OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); + DISubprogram *SP = OldFunc->getSubprogram(); + if (SP) { + assert(!MustCloneSP || ModuleLevelChanges); + // Add mappings for some DebugInfo nodes that we don't want duplicated + // even if they're distinct. + auto &MD = VMap.MD(); + MD[SP->getUnit()].reset(SP->getUnit()); + MD[SP->getType()].reset(SP->getType()); + MD[SP->getFile()].reset(SP->getFile()); + // If we're not cloning into the same module, no need to clone the + // subprogram + if (!MustCloneSP) + MD[SP].reset(SP); + } + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); for (auto MD : MDs) { - MDNode *NewMD; - bool MustCloneSP = - (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() && - OldFunc->getParent() == NewFunc->getParent()); - if (MustCloneSP) { - auto *SP = cast<DISubprogram>(MD.second); - NewMD = DISubprogram::getDistinct( - NewFunc->getContext(), SP->getScope(), SP->getName(), - SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(), - SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(), - SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(), - SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(), - SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(), - SP->getVariables(), SP->getThrownTypes()); - } else - NewMD = - MapMetadata(MD.second, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, - TypeMapper, Materializer); - NewFunc->addMetadata(MD.first, *NewMD); + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); } + // When we remap instructions, we want to avoid duplicating inlined + // DISubprograms, so record all subprograms we find as we duplicate + // instructions and then freeze them in the MD map. + DebugInfoFinder DIFinder; + // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. @@ -156,7 +164,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, + SP ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -178,6 +187,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } + for (DISubprogram *ISP : DIFinder.subprograms()) { + if (ISP != SP) { + VMap.MD()[ISP].reset(ISP); + } + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -226,7 +241,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", CodeInfo); return NewF; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 8b9a64c220cc..799eef21dc4e 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4779,6 +4779,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { scalarizeInstruction(&I, true); break; } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -7396,6 +7397,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // likely. return Cost / getReciprocalPredBlockProb(); } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index e6f78e6b94a3..d1349535f298 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -259,6 +259,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, if (hasVectorInstrinsicScalarOpd(ID, 1)) { return (CI->getArgOperand(1) == Scalar); } + LLVM_FALLTHROUGH; } default: return false; @@ -4749,56 +4750,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } -namespace { -/// Tracks instructons and its children. -class WeakTrackingVHWithLevel final : public CallbackVH { - /// Operand index of the instruction currently beeing analized. - unsigned Level = 0; - /// Is this the instruction that should be vectorized, or are we now - /// processing children (i.e. operands of this instruction) for potential - /// vectorization? - bool IsInitial = true; - -public: - explicit WeakTrackingVHWithLevel() = default; - WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){}; - /// Restart children analysis each time it is repaced by the new instruction. - void allUsesReplacedWith(Value *New) override { - setValPtr(New); - Level = 0; - IsInitial = true; - } - /// Check if the instruction was not deleted during vectorization. - bool isValid() const { return !getValPtr(); } - /// Is the istruction itself must be vectorized? - bool isInitial() const { return IsInitial; } - /// Try to vectorize children. - void clearInitial() { IsInitial = false; } - /// Are all children processed already? - bool isFinal() const { - assert(getValPtr() && - (isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() >= Level)); - return getValPtr() && - cast<Instruction>(getValPtr())->getNumOperands() == Level; - } - /// Get next child operation. - Value *nextOperand() { - assert(getValPtr() && isa<Instruction>(getValPtr()) && - cast<Instruction>(getValPtr())->getNumOperands() > Level); - return cast<Instruction>(getValPtr())->getOperand(Level++); - } - virtual ~WeakTrackingVHWithLevel() = default; -}; -} // namespace - -/// \brief Attempt to reduce a horizontal reduction. -/// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators Root in a basic block BB, then check -/// if it can be done. -/// \returns true if a horizontal reduction was matched and reduced. -/// \returns false if a horizontal reduction was not matched. -static bool canBeVectorized( +/// Attempt to reduce a horizontal reduction. +/// If it is legal to match a horizontal reduction feeding the phi node \a P +/// with reduction operators \a Root (or one of its operands) in a basic block +/// \a BB, then check if it can be done. If horizontal reduction is not found +/// and root instruction is a binary operation, vectorization of the operands is +/// attempted. +/// \returns true if a horizontal reduction was matched and reduced or operands +/// of one of the binary instruction were vectorized. +/// \returns false if a horizontal reduction was not matched (or not possible) +/// or no vectorization of any binary operation feeding \a Root instruction was +/// performed. +static bool tryToVectorizeHorReductionOrInstOperands( PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) { @@ -4810,56 +4773,62 @@ static bool canBeVectorized( if (Root->getParent() != BB) return false; - SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root); + // Start analysis starting from Root instruction. If horizontal reduction is + // found, try to vectorize it. If it is not a horizontal reduction or + // vectorization is not possible or not effective, and currently analyzed + // instruction is a binary operation, try to vectorize the operands, using + // pre-order DFS traversal order. If the operands were not vectorized, repeat + // the same procedure considering each operand as a possible root of the + // horizontal reduction. + // Interrupt the process if the Root instruction itself was vectorized or all + // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. + SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0}); SmallSet<Value *, 8> VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V = Stack.back(); - if (!V) { - Stack.pop_back(); + Value *V; + unsigned Level; + std::tie(V, Level) = Stack.pop_back_val(); + if (!V) continue; - } auto *Inst = dyn_cast<Instruction>(V); - if (!Inst || isa<PHINode>(Inst)) { - Stack.pop_back(); + if (!Inst || isa<PHINode>(Inst)) continue; - } - if (Stack.back().isInitial()) { - Stack.back().clearInitial(); - if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { - HorizontalReduction HorRdx; - if (HorRdx.matchAssociativeReduction(P, BI)) { - if (HorRdx.tryToReduce(R, TTI)) { - Res = true; - P = nullptr; - continue; - } - } - if (P) { - Inst = dyn_cast<Instruction>(BI->getOperand(0)); - if (Inst == P) - Inst = dyn_cast<Instruction>(BI->getOperand(1)); - if (!Inst) { - P = nullptr; - continue; - } + if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { + HorizontalReduction HorRdx; + if (HorRdx.matchAssociativeReduction(P, BI)) { + if (HorRdx.tryToReduce(R, TTI)) { + Res = true; + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; } } - P = nullptr; - if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { - Res = true; - continue; + if (P) { + Inst = dyn_cast<Instruction>(BI->getOperand(0)); + if (Inst == P) + Inst = dyn_cast<Instruction>(BI->getOperand(1)); + if (!Inst) { + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; + } } } - if (Stack.back().isFinal()) { - Stack.pop_back(); + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { + Res = true; continue; } - if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand())) - if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && - Stack.size() < RecursionMaxDepth) - Stack.push_back(NextV); + // Try to vectorize operands. + if (++Level < RecursionMaxDepth) + for (auto *Op : Inst->operand_values()) + Stack.emplace_back(Op, Level); } return Res; } @@ -4876,10 +4845,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, if (!isa<BinaryOperator>(I)) P = nullptr; // Try to match and vectorize a horizontal reduction. - return canBeVectorized(P, I, BB, R, TTI, - [this](BinaryOperator *BI, BoUpSLP &R) -> bool { - return tryToVectorize(BI, R); - }); + return tryToVectorizeHorReductionOrInstOperands( + P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool { + return tryToVectorize(BI, R); + }); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index ef56fa1b9367..6793a49a2ddc 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -188,6 +188,7 @@ else() # if this is included from LLVM's CMake else() get_cmake_property(variableNames VARIABLES) add_custom_target(builtins) + add_custom_target(install-builtins) foreach(target ${LLVM_BUILTIN_TARGETS}) string(REPLACE "-" ";" builtin_target_list ${target}) foreach(item ${builtin_target_list}) @@ -218,6 +219,7 @@ else() # if this is included from LLVM's CMake USE_TOOLCHAIN ${EXTRA_ARGS}) add_dependencies(builtins builtins-${target}) + add_dependencies(install-builtins install-builtins-${target}) endforeach() endif() set(deps builtins) diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir index 0557008ceb4f..b3e41c7751c5 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -10,18 +10,27 @@ entry: ret void } + + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 + + attributes #1 = { nounwind } + ... --- name: test_constant registers: - { id: 0, class: _ } + - { id: 1, class: _ } body: | bb.0.entry: ; CHECK-LABEL: name: test_constant ; CHECK: %0(s32) = G_CONSTANT i32 5 + ; CHECK: %1(s1) = G_CONSTANT i1 false %0(s32) = G_CONSTANT i32 5 + %1(s1) = G_CONSTANT i1 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %0, %0, %0, %0, %0, %1, %1; ... --- diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll index e245e4296df2..d8f9e4f51ff4 100644 --- a/test/CodeGen/AMDGPU/basic-branch.ll +++ b/test/CodeGen/AMDGPU/basic-branch.ll @@ -34,8 +34,6 @@ end: ; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]] ; GCN: buffer_store_dword -; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; TODO: This waitcnt can be eliminated ; GCN: {{^}}[[END]]: ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/branch-condition-and.ll b/test/CodeGen/AMDGPU/branch-condition-and.ll index 68b77ea3490e..662ea37a2b99 100644 --- a/test/CodeGen/AMDGPU/branch-condition-and.ll +++ b/test/CodeGen/AMDGPU/branch-condition-and.ll @@ -19,9 +19,8 @@ ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %bb4 ; GCN: ds_write_b32 -; GCN: s_waitcnt -; GCN-NEXT: [[BB5]] +; GCN: [[BB5]] ; GCN: s_or_b64 exec, exec ; GCN-NEXT: s_endpgm ; GCN-NEXT: .Lfunc_end diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll index 263059d4a6ed..d3f835bdf163 100644 --- a/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -223,7 +223,6 @@ bb3: ; GCN-NEXT: [[BB2]]: ; %bb2 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 ; GCN: buffer_store_dword [[BB2_K]] -; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2 ; GCN-NEXT: s_getpc_b64 vcc @@ -393,7 +392,6 @@ bb3: ; GCN-NEXT: ; BB#2: ; %if_uniform ; GCN: buffer_store_dword -; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: [[ENDIF]]: ; %endif ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] diff --git a/test/CodeGen/AMDGPU/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll index 66148a43a271..caba83c50428 100644 --- a/test/CodeGen/AMDGPU/commute-compares.ll +++ b/test/CodeGen/AMDGPU/commute-compares.ll @@ -35,7 +35,7 @@ define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspa ; FIXME: Why isn't this being folded as a constant? ; GCN-LABEL: {{^}}commute_ne_litk_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 -; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}} +; GCN: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, [[K]] define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -99,11 +99,9 @@ define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrsp ret void } -; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm - ; GCN-LABEL: {{^}}commute_ule_64_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}} -; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}} +; GCN: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, [[K]] define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -702,7 +700,7 @@ define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double ad ; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} -; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}} +; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]] define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: %stack0 = alloca i32 diff --git a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index d3e6c11ef908..79d9b1691878 100644 --- a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -37,22 +37,21 @@ ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] +; GCN: s_waitcnt lgkmcnt(0) ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload -; GCN: s_waitcnt vmcnt(0) ; Spill val register ; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]] ; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) ; VMEM: [[ENDIF]]: ; Reload and restore exec mask +; VGPR: s_waitcnt lgkmcnt(0) ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -119,7 +118,6 @@ endif: ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] ; GCN-NEXT: s_cbranch_execz [[END]] @@ -130,7 +128,6 @@ endif: ; GCN: v_cmp_ne_u32_e32 vcc, ; GCN: s_and_b64 vcc, exec, vcc ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_cbranch_vccnz [[LOOP]] @@ -197,7 +194,6 @@ end: ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_mov_b64 exec, [[CMP0]] -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; FIXME: It makes no sense to put this skip here ; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] @@ -235,7 +231,6 @@ end: ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN-NEXT: s_cbranch_execz [[ENDIF]] @@ -245,14 +240,12 @@ end: ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ELSE]]: ; %else ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index b18ae353ca4c..fab1f8d12253 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -120,8 +120,7 @@ entry: ; FIXME: The waitcnt for the argument load can go after the loop ; IDXMODE: s_set_gpr_idx_on 0, src0 ; GCN: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, exec -; GCN: s_waitcnt lgkmcnt(0) - +; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v{{[0-9]+}} ; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe0 @@ -250,8 +249,6 @@ entry: ; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}} ; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; GCN: s_waitcnt lgkmcnt(0) - ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] @@ -290,7 +287,6 @@ entry: ; IDXMODE: s_set_gpr_idx_on 0, dst ; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; GCN: s_waitcnt lgkmcnt(0) ; The offset depends on the register that holds the first element of the vector. ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] @@ -330,9 +326,9 @@ entry: ; IDXMODE: s_set_gpr_idx_on 0, src0 ; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec -; GCN: s_waitcnt vmcnt(0) ; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] @@ -411,6 +407,7 @@ bb2: ; IDXMODE: s_set_gpr_idx_on 0, dst ; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] diff --git a/test/CodeGen/AMDGPU/infinite-loop.ll b/test/CodeGen/AMDGPU/infinite-loop.ll index 73482756b8c8..3caffc342c7e 100644 --- a/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/test/CodeGen/AMDGPU/infinite-loop.ll @@ -4,8 +4,8 @@ ; SI-LABEL: {{^}}infinite_loop: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 ; SI: BB0_1: +; SI: s_waitcnt lgkmcnt(0) ; SI: buffer_store_dword [[REG]] -; SI: s_waitcnt vmcnt(0) expcnt(0) ; SI: s_branch BB0_1 define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 350dd38ef583..1edccff3bf15 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -421,11 +421,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspac } ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr: -; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} -; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 ; GCN: flat_load_dword [[IDX:v[0-9]+]] ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 +; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] @@ -449,11 +448,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspac } ; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr: -; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} -; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 ; GCN: flat_load_dword [[IDX:v[0-9]+]] ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 +; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll index 555a1d23ebe9..e50455f6f9a1 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll @@ -58,7 +58,7 @@ main_body: ; ;CHECK-LABEL: {{^}}buffer_store_wait: ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen -;CHECK: s_waitcnt vmcnt(0) expcnt(0) +;CHECK: s_waitcnt expcnt(0) ;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll index 5ae255c7a26c..81597516d5f2 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll @@ -58,7 +58,7 @@ main_body: ; ;CHECK-LABEL: {{^}}buffer_store_wait: ;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen -;CHECK: s_waitcnt vmcnt(0) expcnt(0) +;CHECK: s_waitcnt expcnt(0) ;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 idxen diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll index 02642142ae2c..d97644262016 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll @@ -5,7 +5,6 @@ declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0 ; FUNC-LABEL: {{^}}ds_swizzle: ; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11") -; CHECK: s_waitcnt lgkmcnt define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind { %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 store i32 %swizzle, i32 addrspace(1)* %out, align 4 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll index c74c0fa15855..a289f7b0cfb1 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -130,7 +130,7 @@ main_body: ; ; GCN-LABEL: {{^}}image_store_wait: ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) expcnt(0) +; GCN: s_waitcnt expcnt(0) ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm ; GCN: s_waitcnt vmcnt(0) ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index 055dddbfa8af..9a27809f37bb 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -2,6 +2,8 @@ ; RUN: llc -mtriple=amdgcn--amdhsa-opencl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s ; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-NOENV %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgizcl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s ; ALL-LABEL: {{^}}test: ; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll index ef9cda142850..3d815cca5be2 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -1,10 +1,13 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s +; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s ; GCN-LABEL: {{^}}test_barrier: ; GFX8: buffer_store_dword ; GFX9: flat_store_dword -; GCN: s_waitcnt +; NOAUTO: s_waitcnt +; AUTO-NOT: s_waitcnt ; GCN: s_barrier define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { entry: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll index b488565c6b3a..224b2ed72e3b 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @test_s_dcache_inv() #0 { ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.inv() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll index a3a5c329f411..f96d5db5794a 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol() #0 { ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.inv.vol() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll index 909a85dda3e8..99b651350439 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_wb() #0 { ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.wb() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll index 217bf97c41a4..844fcecdb48b 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol() #0 { ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.wb.vol() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll index 6083ec885a86..ee58d359a935 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -18,8 +18,8 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> ; ; CHECK-LABEL: {{^}}test2: ; CHECK: image_load -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: s_waitcnt +; CHECK-NEXT: s_waitcnt +; CHECK: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: image_store define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) { %t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) diff --git a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 9d0b6b395996..82c27f204a47 100644 --- a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -362,6 +362,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock ; GCN-NEXT: s_or_b64 exec, exec +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ; return define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 { diff --git a/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll new file mode 100644 index 000000000000..bced3c408c52 --- /dev/null +++ b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: @volatile_load +; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 +; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} + +define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, i32 addrspace(1)* nocapture %arg1) { +bb: + %tmp18 = load volatile i32, i32 addrspace(1)* %arg, align 4 + %tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 5 + store i32 %tmp18, i32 addrspace(1)* %tmp26, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll index f2fbacbab82e..e7a05d94cdc4 100644 --- a/test/CodeGen/AMDGPU/ret_jump.ll +++ b/test/CodeGen/AMDGPU/ret_jump.ll @@ -65,7 +65,6 @@ ret.bb: ; preds = %else, %main_body ; GCN-NEXT: ; %unreachable.bb ; GCN: ds_write_b32 -; GCN: s_waitcnt ; GCN: ; divergent unreachable ; GCN: ; %ret.bb @@ -73,6 +72,7 @@ ret.bb: ; preds = %else, %main_body ; GCN: ; %UnifiedReturnBlock ; GCN-NEXT: s_or_b64 exec, exec +; GCN-NEXT: s_waitcnt ; GCN-NEXT: ; return ; GCN-NEXT: .Lfunc_end define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 { diff --git a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll index cb010cf15300..5b0d5274d5bc 100644 --- a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll +++ b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -9,7 +9,6 @@ ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %unreachable ; GCN: ds_write_b32 ; GCN: ; divergent unreachable -; GCN: s_waitcnt ; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock ; GCN-NEXT: s_or_b64 exec, exec @@ -38,7 +37,6 @@ ret: ; GCN-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %unreachable ; GCN: ds_write_b32 ; GCN: ; divergent unreachable -; GCN: s_waitcnt ; GCN: [[RETURN]]: ; GCN-NEXT: s_or_b64 exec, exec @@ -66,7 +64,6 @@ unreachable: ; GCN: [[UNREACHABLE]]: ; GCN: ds_write_b32 -; GCN: s_waitcnt define amdgpu_kernel void @uniform_lower_control_flow_unreachable_terminator(i32 %arg0) #0 { bb: %tmp63 = icmp eq i32 %arg0, 32 diff --git a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll index 343211b0219c..333113e8a9b6 100644 --- a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -5,7 +5,7 @@ ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 ; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}} -; GCN: s_waitcnt lgkmcnt(0) +; VCCZ-BUG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VCCZ-BUG: s_mov_b64 vcc, vcc ; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]] diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll index 8f1aebfe9ceb..7e8fa118c2c2 100644 --- a/test/CodeGen/AMDGPU/spill-m0.ll +++ b/test/CodeGen/AMDGPU/spill-m0.ll @@ -18,13 +18,11 @@ ; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 ; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]] ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Spill -; TOVMEM: s_waitcnt vmcnt(0) ; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 ; TOSMEM: s_add_u32 m0, s3, 0x100{{$}} ; TOSMEM-NOT: [[M0_COPY]] ; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill -; TOSMEM: s_waitcnt lgkmcnt(0) ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] diff --git a/test/CodeGen/AMDGPU/sub.i16.ll b/test/CodeGen/AMDGPU/sub.i16.ll index cf9e714ea6d3..1d407ea9bcda 100644 --- a/test/CodeGen/AMDGPU/sub.i16.ll +++ b/test/CodeGen/AMDGPU/sub.i16.ll @@ -85,9 +85,9 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i1 ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64: +; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 ; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]] ; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index 85a8929ebe58..a67f36d0a7e8 100644 --- a/test/CodeGen/AMDGPU/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll @@ -11,7 +11,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; SI: v_cmp_lt_i32_e32 vcc, 0, ; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc ; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]] ; SI-NEXT: s_cbranch_execz [[FLOW_BB]] @@ -72,7 +71,6 @@ end: ; SI-NEXT: BB{{[0-9]+_[0-9]+}}: ; SI: buffer_store_dword -; SI-NEXT: s_waitcnt ; SI-NEXT: {{^}}[[EXIT]]: ; SI: s_or_b64 exec, exec, [[BR_SREG]] @@ -101,7 +99,6 @@ exit: ; SI-NEXT: BB{{[0-9]+_[0-9]+}}: ; SI: buffer_store_dword -; SI-NEXT: s_waitcnt ; SI-NEXT: {{^}}[[EXIT]]: ; SI: s_or_b64 exec, exec, [[BR_SREG]] @@ -132,7 +129,6 @@ exit: ; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %exit ; SI: ds_write_b32 -; SI: s_waitcnt ; SI-NEXT: {{^}}[[FLOW]]: ; SI-NEXT: s_or_saveexec_b64 @@ -140,8 +136,8 @@ exit: ; SI-NEXT: ; mask branch [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]] ; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %then -; SI: buffer_store_dword -; SI-NEXT: s_waitcnt +; SI: s_waitcnt +; SI-NEXT: buffer_store_dword ; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock ; SI: s_or_b64 exec, exec diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index f4aba880ff76..1c7769894a27 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -974,6 +974,68 @@ entry: ret [2 x i32*] %r } +declare arm_aapcscc {i32, i32} @structs_target({i32, i32}, {i32*, float, i32, double}) + +define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x, {i32*, float, i32, double} %y) { +; CHECK-LABEL: test_structs +; CHECK: fixedStack: +; CHECK-DAG: id: [[Y2_ID:[0-9]+]], offset: 0, size: 4 +; CHECK-DAG: id: [[Y3_ID:[0-9]+]], offset: 8, size: 8 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 +; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 +; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 +; CHECK: [[Y2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Y2_ID]] +; CHECK: [[Y2:%[0-9]+]](s32) = G_LOAD [[Y2_ADDR]](p0){{.*}}load 4 +; CHECK: [[Y3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Y3_ID]] +; CHECK: [[Y3:%[0-9]+]](s64) = G_LOAD [[Y3_ADDR]](p0){{.*}}load 8 +; CHECK: [[X_0:%[0-9]+]](s64) = IMPLICIT_DEF +; CHECK: [[X_1:%[0-9]+]](s64) = G_INSERT [[X_0]], [[X0]](s32), 0 +; CHECK: [[X_2:%[0-9]+]](s64) = G_INSERT [[X_1]], [[X1]](s32), 32 +; CHECK: [[X:%[0-9]+]](s64) = COPY [[X_2]] +; CHECK: [[Y_0:%[0-9]+]](s192) = IMPLICIT_DEF +; CHECK: [[Y_1:%[0-9]+]](s192) = G_INSERT [[Y_0]], [[Y0]](s32), 0 +; CHECK: [[Y_2:%[0-9]+]](s192) = G_INSERT [[Y_1]], [[Y1]](s32), 32 +; CHECK: [[Y_3:%[0-9]+]](s192) = G_INSERT [[Y_2]], [[Y2]](s32), 64 +; CHECK: [[Y_4:%[0-9]+]](s192) = G_INSERT [[Y_3]], [[Y3]](s64), 128 +; CHECK: [[Y:%[0-9]+]](s192) = COPY [[Y_4]] +; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[X0:%[0-9]+]](s32) = G_EXTRACT [[X]](s64), 0 +; CHECK: [[X1:%[0-9]+]](s32) = G_EXTRACT [[X]](s64), 32 +; CHECK: [[Y0:%[0-9]+]](s32) = G_EXTRACT [[Y]](s192), 0 +; CHECK: [[Y1:%[0-9]+]](s32) = G_EXTRACT [[Y]](s192), 32 +; CHECK: [[Y2:%[0-9]+]](s32) = G_EXTRACT [[Y]](s192), 64 +; CHECK: [[Y3:%[0-9]+]](s64) = G_EXTRACT [[Y]](s192), 128 +; CHECK-DAG: %r0 = COPY [[X0]](s32) +; CHECK-DAG: %r1 = COPY [[X1]](s32) +; CHECK-DAG: %r2 = COPY [[Y0]](s32) +; CHECK-DAG: %r3 = COPY [[Y1]](s32) +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[Y2_OFF:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[Y2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Y2_OFF]](s32) +; CHECK: G_STORE [[Y2]](s32), [[Y2_ADDR]](p0){{.*}}store 4 +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[Y3_OFF:%[0-9]+]](s32) = G_CONSTANT i32 8 +; CHECK: [[Y3_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Y3_OFF]](s32) +; CHECK: G_STORE [[Y3]](s64), [[Y3_ADDR]](p0){{.*}}store 8 +; CHECK: BLX @structs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R_0:%[0-9]+]](s64) = IMPLICIT_DEF +; CHECK: [[R_1:%[0-9]+]](s64) = G_INSERT [[R_0]], [[R0]](s32), 0 +; CHECK: [[R_2:%[0-9]+]](s64) = G_INSERT [[R_1]], [[R1]](s32), 32 +; CHECK: [[R:%[0-9]+]](s64) = COPY [[R_2]] +; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[R]](s64), 0 +; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[R]](s64), 32 +; CHECK: %r0 = COPY [[R0]](s32) +; CHECK: %r1 = COPY [[R1]](s32) +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 + %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x, {i32*, float, i32, double} %y) + ret {i32, i32} %r +} + define i32 @test_shufflevector_s32_v2s32(i32 %arg) { ; CHECK-LABEL: name: test_shufflevector_s32_v2s32 ; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0 diff --git a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll index ef30cb1063f8..34f00aebe1be 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll @@ -54,10 +54,15 @@ define [16 x i32] @test_ret_demotion() { ret [16 x i32] %res } -define void @test_structs({i32, i32} %struct) { -; CHECK: remark: {{.*}} unable to lower arguments: void ({ i32, i32 })* -; CHECK-LABEL: warning: Instruction selection used fallback path for test_structs - ret void +%large.struct = type { i32, i32, i32, i32, i32} ; Doesn't fit in R0-R3 + +declare %large.struct @large_struct_return_target() + +define %large.struct @test_large_struct_return() { +; CHECK: remark: {{.*}} unable to translate instruction: call{{.*}} @large_struct_return_target +; CHECK-LABEL: warning: Instruction selection used fallback path for test_large_struct_return + %r = call %large.struct @large_struct_return_target() + ret %large.struct %r } define void @test_vararg_definition(i32 %a, ...) { diff --git a/test/CodeGen/ARM/cortex-a57-misched-alu.ll b/test/CodeGen/ARM/cortex-a57-misched-alu.ll new file mode 100644 index 000000000000..960ee87532b0 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-alu.ll @@ -0,0 +1,81 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; Check the latency for ALU shifted operand variants. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: foo:BB#0 entry + +; ALU, basic - 1 cyc I0/I1 +; CHECK: EORrr +; CHECK: rdefs left +; CHECK-NEXT: Latency : 1 + +; ALU, shift by immed - 2 cyc M +; CHECK: ADDrsi +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +; ALU, shift by register, unconditional - 2 cyc M +; CHECK: RSBrsr +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +; ALU, shift by register, conditional - 2 cyc I0/I1 +; CHECK: ANDrsr +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +; Checking scheduling units + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; Skipping COPY +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: ANDrsr +; CHECK: Ready +; CHECK-NEXT: A57UnitI + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: CMPri +; CHECK: Ready +; CHECK-NEXT: A57UnitI + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: RSBrsr +; CHECK: Ready +; CHECK-NEXT: A57UnitM + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: ADDrsi +; CHECK: Ready +; CHECK-NEXT: A57UnitM + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: EORrr +; CHECK: Ready +; CHECK-NEXT: A57UnitI + + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv8r-arm-none-eabi" + +; Function Attrs: norecurse nounwind readnone +define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %xor = xor i32 %a, %b + %xor_shl = shl i32 %xor, 2 + %add = add i32 %xor_shl, %d + %add_ashr = ashr i32 %add, %a + %sub = sub i32 %add_ashr, %a + %sub_lshr_pred = lshr i32 %sub, %c + %pred = icmp sgt i32 %a, 4 + %and = and i32 %sub_lshr_pred, %b + %rv = select i1 %pred, i32 %and, i32 %d + ret i32 %rv +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/test/CodeGen/ARM/cortex-a57-misched-basic.ll new file mode 100644 index 000000000000..2ec50b9d3343 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-basic.ll @@ -0,0 +1,53 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=A57_SCHED +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC + +; Check the latency for instructions for both generic and cortex-a57. +; SDIV should be scheduled at the block's begin (20 cyc of independent M unit). +; +; CHECK: ********** MI Scheduling ********** +; CHECK: foo:BB#0 entry + +; GENERIC: SDIV +; GENERIC: Latency : 1 +; GENERIC: EORrr +; GENERIC: Latency : 1 +; GENERIC: LDRi12 +; GENERIC: Latency : 4 +; GENERIC: ADDrr +; GENERIC: Latency : 1 +; GENERIC: SUBrr +; GENERIC: Latency : 1 + +; A57_SCHED: SDIV +; A57_SCHED: Latency : 20 +; A57_SCHED: EORrr +; A57_SCHED: Latency : 1 +; A57_SCHED: LDRi12 +; A57_SCHED: Latency : 4 +; A57_SCHED: ADDrr +; A57_SCHED: Latency : 1 +; A57_SCHED: SUBrr +; A57_SCHED: Latency : 1 + +; CHECK: ** Final schedule for BB#0 *** +; GENERIC: LDRi12 +; GENERIC: SDIV +; A57_SCHED: SDIV +; A57_SCHED: LDRi12 +; CHECK: ********** INTERVALS ********** + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv8r-arm-none-eabi" + +; Function Attrs: norecurse nounwind readnone +define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr #0 { +entry: + %xor = xor i32 %c, %b + %ld = load i32, i32* %d + %add = add nsw i32 %xor, %ld + %div = sdiv i32 %a, %b + %sub = sub i32 %div, %add + ret i32 %sub +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll new file mode 100644 index 000000000000..d54848a6bcf1 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll @@ -0,0 +1,37 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; + +@a = global i32 0, align 4 +@b = global i32 0, align 4 +@c = global i32 0, align 4 + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have LDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: LDMIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 4 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=1 +; CHECK-NEXT: data +; CHECK-SAME: Latency=3 +; CHECK-NEXT: data +; CHECK-SAME: Latency=3 +; CHECK-NEXT: data +; CHECK-SAME: Latency=4 +define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize { + %1 = load i32, i32* @a, align 4 + %2 = load i32, i32* @b, align 4 + %3 = load i32, i32* @c, align 4 + + %ptr_after = getelementptr i32, i32* @a, i32 3 + + %ptr_val = ptrtoint i32* %ptr_after to i32 + %mul1 = mul i32 %ptr_val, %1 + %mul2 = mul i32 %mul1, %2 + %mul3 = mul i32 %mul2, %3 + ret i32 %mul3 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-ldm.ll b/test/CodeGen/ARM/cortex-a57-misched-ldm.ll new file mode 100644 index 000000000000..9cb076651f5b --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-ldm.ll @@ -0,0 +1,28 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have LDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: LDMIA +; CHECK: rdefs left +; CHECK-NEXT: Latency : 3 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=3 +; CHECK-NEXT: data +; CHECK-SAME: Latency=3 + +define i32 @foo(i32* %a) nounwind optsize { +entry: + %b = getelementptr i32, i32* %a, i32 1 + %c = getelementptr i32, i32* %a, i32 2 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %2 = load i32, i32* %c, align 4 + + %mul1 = mul i32 %0, %1 + %mul2 = mul i32 %mul1, %2 + ret i32 %mul2 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll new file mode 100644 index 000000000000..774b0a907e39 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll @@ -0,0 +1,36 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; N=3 STMIA_UPD should have latency 2cyc and writeback latency 1cyc + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have STM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: STMIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 +; CHECK: Successors +; CHECK: data +; CHECK-SAME: Latency=1 + +define i32 @bar(i32 %v0, i32 %v1, i32 %v2, i32* %addr) { + + %addr.1 = getelementptr i32, i32* %addr, i32 0 + store i32 %v0, i32* %addr.1 + + %addr.2 = getelementptr i32, i32* %addr, i32 1 + store i32 %v1, i32* %addr.2 + + %addr.3 = getelementptr i32, i32* %addr, i32 2 + store i32 %v2, i32* %addr.3 + + %ptr_after = getelementptr i32, i32* %addr, i32 3 + %val = ptrtoint i32* %ptr_after to i32 + + %rv1 = mul i32 %val, %v0 + %rv2 = mul i32 %rv1, %v1 + %rv3 = mul i32 %rv2, %v2 + + ret i32 %rv3 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-stm.ll b/test/CodeGen/ARM/cortex-a57-misched-stm.ll new file mode 100644 index 000000000000..474f39d84bae --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-stm.ll @@ -0,0 +1,29 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; N=3 STMIB should have latency 2cyc + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have STM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: STMIB +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +define i32 @test_stm(i32 %v0, i32 %v1, i32* %addr) { + + %addr.1 = getelementptr i32, i32* %addr, i32 1 + store i32 %v0, i32* %addr.1 + + %addr.2 = getelementptr i32, i32* %addr, i32 2 + store i32 %v1, i32* %addr.2 + + %addr.3 = getelementptr i32, i32* %addr, i32 3 + %val = ptrtoint i32* %addr to i32 + store i32 %val, i32* %addr.3 + + %rv = add i32 %v0, %v1 + + ret i32 %rv +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll new file mode 100644 index 000000000000..a9223e1e2a99 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll @@ -0,0 +1,77 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; Check latencies of vmul/vfma accumulate chains. + +define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test1:BB#0 + +; CHECK: VMULS +; > VMULS common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; > VMULS read-advanced latency to VMLAS = 0 +; CHECK-SAME: Latency=0 + +; CHECK: VMLAS +; > VMLAS common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAS read-advanced latency to the next VMLAS = 4 +; CHECK-SAME: Latency=4 + +; CHECK: VMLAS +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS + %mul1 = fmul float %f1, %f2 + %mul2 = fmul float %f3, %f4 + %mul3 = fmul float %f5, %f6 + %add1 = fadd float %mul1, %mul2 + %add2 = fadd float %add1, %mul3 + ret float %add2 +} + +; ASIMD form +define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test2:BB#0 + +; CHECK: VMULfd +; > VMULfd common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; VMULfd read-advanced latency to VMLAfd = 0 +; CHECK-SAME: Latency=0 + +; CHECK: VMLAfd +; > VMLAfd common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAfd read-advanced latency to the next VMLAfd = 4 +; CHECK-SAME: Latency=4 + +; CHECK: VMLAfd +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAfd not-optimized latency to VMOVRRD = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS + %mul1 = fmul <2 x float> %f1, %f2 + %mul2 = fmul <2 x float> %f3, %f4 + %mul3 = fmul <2 x float> %f5, %f6 + %add1 = fadd <2 x float> %mul1, %mul2 + %add2 = fadd <2 x float> %add1, %mul3 + ret <2 x float> %add2 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll new file mode 100644 index 000000000000..6cfa823fb969 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll @@ -0,0 +1,50 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; + +@a = global double 0.0, align 4 +@b = global double 0.0, align 4 +@c = global double 0.0, align 4 + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VLDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: VLDMDIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 6 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=1 +; CHECK-NEXT: data +; CHECK-SAME: Latency=1 +; CHECK-NEXT: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=6 +define i32 @bar(i32* %iptr) minsize optsize { + %1 = load double, double* @a, align 8 + %2 = load double, double* @b, align 8 + %3 = load double, double* @c, align 8 + + %ptr_after = getelementptr double, double* @a, i32 3 + + %ptr_new_ival = ptrtoint double* %ptr_after to i32 + %ptr_new = inttoptr i32 %ptr_new_ival to i32* + + store i32 %ptr_new_ival, i32* %iptr, align 8 + + %v1 = fptoui double %1 to i32 + + %mul1 = mul i32 %ptr_new_ival, %v1 + + %v2 = fptoui double %2 to i32 + %v3 = fptoui double %3 to i32 + + %mul2 = mul i32 %mul1, %v2 + %mul3 = mul i32 %mul2, %v3 + + ret i32 %mul3 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vldm.ll b/test/CodeGen/ARM/cortex-a57-misched-vldm.ll new file mode 100644 index 000000000000..218b5b41a7e4 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vldm.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VLDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: VLDMDIA +; CHECK: rdefs left +; CHECK-NEXT: Latency : 6 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=6 + +define double @foo(double* %a) nounwind optsize { +entry: + %b = getelementptr double, double* %a, i32 1 + %c = getelementptr double, double* %a, i32 2 + %0 = load double, double* %a, align 4 + %1 = load double, double* %b, align 4 + %2 = load double, double* %c, align 4 + + %mul1 = fmul double %0, %1 + %mul2 = fmul double %mul1, %2 + ret double %mul2 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll new file mode 100644 index 000000000000..af1c469d4443 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll @@ -0,0 +1,43 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VSTM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: VSTMDIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 4 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=1 + +@a = global double 0.0, align 4 +@b = global double 0.0, align 4 +@c = global double 0.0, align 4 + +define i32 @bar(double* %vptr, i32 %iv1, i32* %iptr) minsize { + + %vp2 = getelementptr double, double* %vptr, i32 1 + %vp3 = getelementptr double, double* %vptr, i32 2 + + %v1 = load double, double* %vptr, align 8 + %v2 = load double, double* %vp2, align 8 + %v3 = load double, double* %vp3, align 8 + + store double %v1, double* @a, align 8 + store double %v2, double* @b, align 8 + store double %v3, double* @c, align 8 + + %ptr_after = getelementptr double, double* @a, i32 3 + + %ptr_new_ival = ptrtoint double* %ptr_after to i32 + %ptr_new = inttoptr i32 %ptr_new_ival to i32* + + store i32 %ptr_new_ival, i32* %iptr, align 8 + + %mul1 = mul i32 %ptr_new_ival, %iv1 + + ret i32 %mul1 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vstm.ll b/test/CodeGen/ARM/cortex-a57-misched-vstm.ll new file mode 100644 index 000000000000..f31474f66558 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vstm.ll @@ -0,0 +1,23 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VSTM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: VSTMDIA +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +%bigVec = type [2 x double] + +@var = global %bigVec zeroinitializer + +define void @bar(%bigVec* %ptr) { + + %tmp = load %bigVec, %bigVec* %ptr + store %bigVec %tmp, %bigVec* @var + + ret void +} + diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll index a9e0d199705a..03c977614320 100644 --- a/test/CodeGen/ARM/global-merge-external.ll +++ b/test/CodeGen/ARM/global-merge-external.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE ; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE ; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE @x = global i32 0, align 4 @y = global i32 0, align 4 diff --git a/test/CodeGen/Hexagon/newify-crash.ll b/test/CodeGen/Hexagon/newify-crash.ll new file mode 100644 index 000000000000..705170b13a59 --- /dev/null +++ b/test/CodeGen/Hexagon/newify-crash.ll @@ -0,0 +1,44 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; Check that this testcase doesn't crash. +; CHECK: vadd + +target triple = "hexagon" + +define void @fred() #0 { +b0: + br label %b1 + +b1: ; preds = %b7, %b0 + %v2 = phi i32 [ 0, %b0 ], [ %v16, %b7 ] + %v3 = phi <32 x i32> [ undef, %b0 ], [ %v15, %b7 ] + %v4 = icmp slt i32 %v2, undef + br i1 %v4, label %b5, label %b7 + +b5: ; preds = %b1 + %v6 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v3, <32 x i32> undef) + br label %b7 + +b7: ; preds = %b5, %b1 + %v8 = phi <32 x i32> [ %v6, %b5 ], [ %v3, %b1 ] + %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v8, <32 x i32> undef) + %v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v9, <32 x i32> undef) + %v11 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v10, <32 x i32> undef) + %v12 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v11, <32 x i32> undef) + %v13 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v12, <32 x i32> zeroinitializer) + %v14 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v13, <32 x i32> undef) + %v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v14, <32 x i32> undef) + %v16 = add nsw i32 %v2, 8 + %v17 = icmp eq i32 %v16, 64 + br i1 %v17, label %b18, label %b1 + +b18: ; preds = %b7 + tail call void @f0() #0 + ret void +} + +declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1 +declare void @f0() #0 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/MIR/Generic/runPass.mir b/test/CodeGen/MIR/Generic/runPass.mir index eeef9d526510..33380d4c6bb4 100644 --- a/test/CodeGen/MIR/Generic/runPass.mir +++ b/test/CodeGen/MIR/Generic/runPass.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass=greedy -debug-pass=Arguments -o - %s | FileCheck %s +# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s # Check that passes are initialized correctly, so that it's possible to # use -run-pass. diff --git a/test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll b/test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll new file mode 100644 index 000000000000..804ea1e5c438 --- /dev/null +++ b/test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips -verify-machineinstrs < %s | FileCheck %s + +define void @f1(i8* %p) { +entry: +; CHECK-LABEL: f1: +; CHECK: lbu16 +; CHECK: sb16 + %0 = load i8, i8* %p, align 4 + %a = zext i8 %0 to i32 + %and = and i32 %a, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i8 0, i8* %p, align 1 + br label %if.end + +if.end: + ret void +} + +define void @f2(i16* %p) { +entry: +; CHECK-LABEL: f2: +; CHECK: lhu16 +; CHECK: sh16 + %0 = load i16, i16* %p, align 2 + %a = zext i16 %0 to i32 + %and = and i32 %a, 2 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + store i16 0, i16* %p, align 2 + br label %if.end + +if.end: + ret void +} + diff --git a/test/CodeGen/PowerPC/scavenging.mir b/test/CodeGen/PowerPC/scavenging.mir new file mode 100644 index 000000000000..8b5c26230bc6 --- /dev/null +++ b/test/CodeGen/PowerPC/scavenging.mir @@ -0,0 +1,149 @@ +# RUN: llc -mtriple=ppc64-- -run-pass scavenger-test -verify-machineinstrs -o - %s | FileCheck %s +--- +# CHECK-LABEL: name: noscav0 +name: noscav0 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK: [[REG0:%r[0-9]+]] = LI 42 + ; CHECK-NEXT: NOP implicit [[REG0]] + %0 : gprc = LI 42 + NOP implicit %0 + + ; CHECK: [[REG1:%r[0-9]+]] = LI 42 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP implicit [[REG1]] + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP implicit [[REG1]] + %1 : gprc = LI 42 + NOP + NOP implicit %1 + NOP + NOP implicit %1 + + ; CHECK: [[REG2:%r[0-9]+]] = LI 42 + ; CHECK-NEXT: NOP implicit [[REG2]] + %2 : gprc = LI 42 + NOP implicit %2 + + %x0 = IMPLICIT_DEF + %x1 = IMPLICIT_DEF + %x2 = IMPLICIT_DEF + %x3 = IMPLICIT_DEF + %x4 = IMPLICIT_DEF + %x27 = IMPLICIT_DEF + %x28 = IMPLICIT_DEF + %x29 = IMPLICIT_DEF + %x30 = IMPLICIT_DEF + + ; CHECK-NOT: %x0 = LI 42 + ; CHECK-NOT: %x1 = LI 42 + ; CHECK-NOT: %x2 = LI 42 + ; CHECK-NOT: %x3 = LI 42 + ; CHECK-NOT: %x4 = LI 42 + ; CHECK-NOT: %x5 = LI 42 + ; CHECK-NOT: %x27 = LI 42 + ; CHECK-NOT: %x28 = LI 42 + ; CHECK-NOT: %x29 = LI 42 + ; CHECK-NOT: %x30 = LI 42 + ; CHECK: [[REG3:%r[0-9]+]] = LI 42 + ; CHECK-NEXT: %x5 = IMPLICIT_DEF + ; CHECK-NEXT: NOP implicit [[REG2]] + ; CHECK-NEXT: NOP implicit [[REG3]] + %3 : gprc = LI 42 + %x5 = IMPLICIT_DEF + NOP implicit %2 + NOP implicit %3 + + NOP implicit %x0 + NOP implicit %x1 + NOP implicit %x2 + NOP implicit %x3 + NOP implicit %x4 + NOP implicit %x5 + NOP implicit %x27 + NOP implicit %x28 + NOP implicit %x29 + NOP implicit %x30 +... +--- +# CHECK-LABEL: name: scav0 +name: scav0 +tracksRegLiveness: true +stack: + # variable-sized object should be a reason to reserve an emergency spillslot + # in the RegScavenger + - { id: 0, type: variable-sized, offset: -32, alignment: 1 } +body: | + bb.0: + %x0 = IMPLICIT_DEF + %x1 = IMPLICIT_DEF + %x2 = IMPLICIT_DEF + %x3 = IMPLICIT_DEF + %x4 = IMPLICIT_DEF + %x5 = IMPLICIT_DEF + %x6 = IMPLICIT_DEF + %x7 = IMPLICIT_DEF + %x8 = IMPLICIT_DEF + %x9 = IMPLICIT_DEF + %x10 = IMPLICIT_DEF + %x11 = IMPLICIT_DEF + %x12 = IMPLICIT_DEF + %x13 = IMPLICIT_DEF + %x14 = IMPLICIT_DEF + %x15 = IMPLICIT_DEF + %x16 = IMPLICIT_DEF + %x17 = IMPLICIT_DEF + %x18 = IMPLICIT_DEF + %x19 = IMPLICIT_DEF + %x20 = IMPLICIT_DEF + %x21 = IMPLICIT_DEF + %x22 = IMPLICIT_DEF + %x23 = IMPLICIT_DEF + %x24 = IMPLICIT_DEF + %x25 = IMPLICIT_DEF + %x26 = IMPLICIT_DEF + %x27 = IMPLICIT_DEF + %x28 = IMPLICIT_DEF + %x29 = IMPLICIT_DEF + %x30 = IMPLICIT_DEF + + ; CHECK: STD killed [[SPILLEDREG:%x[0-9]+]] + ; CHECK: [[SPILLEDREG]] = LI8 42 + ; CHECK: NOP implicit [[SPILLEDREG]] + ; CHECK: [[SPILLEDREG]] = LD + %0 : g8rc = LI8 42 + NOP implicit %0 + + NOP implicit %x0 + NOP implicit %x1 + NOP implicit %x2 + NOP implicit %x3 + NOP implicit %x4 + NOP implicit %x5 + NOP implicit %x6 + NOP implicit %x7 + NOP implicit %x8 + NOP implicit %x9 + NOP implicit %x10 + NOP implicit %x11 + NOP implicit %x12 + NOP implicit %x13 + NOP implicit %x14 + NOP implicit %x15 + NOP implicit %x16 + NOP implicit %x17 + NOP implicit %x18 + NOP implicit %x19 + NOP implicit %x20 + NOP implicit %x21 + NOP implicit %x22 + NOP implicit %x23 + NOP implicit %x24 + NOP implicit %x25 + NOP implicit %x26 + NOP implicit %x27 + NOP implicit %x28 + NOP implicit %x29 + NOP implicit %x30 +... diff --git a/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir b/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir new file mode 100644 index 000000000000..2f532f0a5efb --- /dev/null +++ b/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir @@ -0,0 +1,267 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -run-pass=regallocbasic %s -o - | FileCheck %s +# This test used to assert in RABasic. The problem was when we split live-ranges, +# we were not updating the LiveRegMatrix properly and the interference calculation +# wouldn't match what the assignment thought it could do. +# In other words, this test case needs to trigger live-range splitting to exercise +# the problem. +# +# PR33057 +--- | + target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" + target triple = "s390x--linux-gnu" + + define void @autogen_SD21418() #0 { + ret void + } + + attributes #0 = { "target-cpu"="z13" } + +... + +# CHECK: name: autogen_SD21418 +# Check that at least one live-range has been split +# CHECK: id: 114, class +--- +name: autogen_SD21418 +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: vr128bit } + - { id: 1, class: vr128bit } + - { id: 2, class: vr128bit } + - { id: 3, class: vr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: vr128bit } + - { id: 6, class: grx32bit } + - { id: 7, class: vr128bit } + - { id: 8, class: vr128bit } + - { id: 9, class: gr32bit } + - { id: 10, class: gr64bit } + - { id: 11, class: vr128bit } + - { id: 12, class: fp64bit } + - { id: 13, class: vr64bit } + - { id: 14, class: vr64bit } + - { id: 15, class: gr64bit } + - { id: 16, class: gr128bit } + - { id: 17, class: gr64bit } + - { id: 18, class: gr32bit } + - { id: 19, class: gr32bit } + - { id: 20, class: gr128bit } + - { id: 21, class: gr32bit } + - { id: 22, class: gr64bit } + - { id: 23, class: gr32bit } + - { id: 24, class: gr32bit } + - { id: 25, class: gr128bit } + - { id: 26, class: grx32bit } + - { id: 27, class: gr64bit } + - { id: 28, class: gr64bit } + - { id: 29, class: vr128bit } + - { id: 30, class: vr128bit } + - { id: 31, class: gr64bit } + - { id: 32, class: gr32bit } + - { id: 33, class: gr32bit } + - { id: 34, class: gr128bit } + - { id: 35, class: gr32bit } + - { id: 36, class: vr128bit } + - { id: 37, class: gr64bit } + - { id: 38, class: gr32bit } + - { id: 39, class: gr32bit } + - { id: 40, class: gr128bit } + - { id: 41, class: gr32bit } + - { id: 42, class: addr64bit } + - { id: 43, class: grx32bit } + - { id: 44, class: addr64bit } + - { id: 45, class: vr64bit } + - { id: 46, class: vr64bit } + - { id: 47, class: gr32bit } + - { id: 48, class: gr32bit } + - { id: 49, class: grx32bit } + - { id: 50, class: vr64bit } + - { id: 51, class: gr64bit } + - { id: 52, class: grx32bit } + - { id: 53, class: gr32bit } + - { id: 54, class: gr64bit } + - { id: 55, class: grx32bit } + - { id: 56, class: gr32bit } + - { id: 57, class: gr128bit } + - { id: 58, class: gr128bit } + - { id: 59, class: gr32bit } + - { id: 60, class: gr64bit } + - { id: 61, class: grx32bit } + - { id: 62, class: gr32bit } + - { id: 63, class: gr64bit } + - { id: 64, class: grx32bit } + - { id: 65, class: gr32bit } + - { id: 66, class: gr128bit } + - { id: 67, class: gr128bit } + - { id: 68, class: grx32bit } + - { id: 69, class: gr64bit } + - { id: 70, class: gr64bit } + - { id: 71, class: vr128bit } + - { id: 72, class: vr128bit } + - { id: 73, class: gr64bit } + - { id: 74, class: grx32bit } + - { id: 75, class: gr32bit } + - { id: 76, class: gr64bit } + - { id: 77, class: grx32bit } + - { id: 78, class: gr32bit } + - { id: 79, class: gr128bit } + - { id: 80, class: gr128bit } + - { id: 81, class: gr32bit } + - { id: 82, class: vr128bit } + - { id: 83, class: gr64bit } + - { id: 84, class: grx32bit } + - { id: 85, class: gr32bit } + - { id: 86, class: gr64bit } + - { id: 87, class: grx32bit } + - { id: 88, class: gr32bit } + - { id: 89, class: gr128bit } + - { id: 90, class: gr128bit } + - { id: 91, class: gr32bit } + - { id: 92, class: grx32bit } + - { id: 93, class: gr64bit } + - { id: 94, class: gr32bit } + - { id: 95, class: gr32bit } + - { id: 96, class: gr32bit } + - { id: 97, class: gr64bit } + - { id: 98, class: gr64bit } + - { id: 99, class: grx32bit } + - { id: 100, class: grx32bit } + - { id: 101, class: gr128bit } + - { id: 102, class: gr128bit } + - { id: 103, class: gr128bit } + - { id: 104, class: gr64bit } + - { id: 105, class: gr128bit } + - { id: 106, class: gr128bit } + - { id: 107, class: gr64bit } + - { id: 108, class: gr128bit } + - { id: 109, class: gr128bit } + - { id: 110, class: gr64bit } + - { id: 111, class: gr128bit } + - { id: 112, class: gr128bit } + - { id: 113, class: gr64bit } +constants: + - id: 0 + value: double 0xD55960F86F577076 + alignment: 8 +body: | + bb.0: + %11 = VGBM 0 + %43 = LHIMux 0 + %44 = LARL %const.0 + %45 = VL64 %44, 0, _ :: (load 8 from constant-pool) + + bb.1: + ADJCALLSTACKDOWN 0, 0 + %12 = LZDR + %f0d = COPY %12 + CallBRASL $fmod, killed %f0d, undef %f2d, csr_systemz, implicit-def dead %r14d, implicit-def dead %cc, implicit-def %f0d + ADJCALLSTACKUP 0, 0 + KILL killed %f0d + + bb.2: + %17 = VLGVH %11, _, 0 + %19 = LHR %17.subreg_l32 + undef %20.subreg_l64 = LGHI 0 + %20 = DSGFR %20, %19 + %22 = VLGVH %11, _, 3 + %24 = LHR %22.subreg_l32 + undef %25.subreg_l64 = LGHI 0 + %25 = DSGFR %25, %24 + %31 = VLGVH %11, _, 1 + %33 = LHR %31.subreg_l32 + undef %34.subreg_l64 = LGHI 0 + %34 = DSGFR %34, %33 + %37 = VLGVH %11, _, 2 + %39 = LHR %37.subreg_l32 + undef %40.subreg_l64 = LGHI 0 + %40 = DSGFR %40, %39 + CHIMux %43, 0, implicit-def %cc + BRC 14, 6, %bb.2, implicit killed %cc + J %bb.3 + + bb.3: + WFCDB undef %46, %45, implicit-def %cc + %48 = IPM implicit killed %cc + %48 = AFIMux %48, 268435456, implicit-def dead %cc + %6 = RISBMux undef %6, %48, 31, 159, 35 + WFCDB undef %50, %45, implicit-def %cc + BRC 15, 6, %bb.1, implicit killed %cc + J %bb.4 + + bb.4: + %36 = VLVGP %25.subreg_l64, %25.subreg_l64 + %36 = VLVGH %36, %20.subreg_l32, _, 0 + %36 = VLVGH %36, %34.subreg_l32, _, 1 + dead %36 = VLVGH %36, %40.subreg_l32, _, 2 + %4 = LG undef %42, 0, _ :: (load 8 from `i64* undef`) + undef %57.subreg_h64 = LLILL 0 + undef %66.subreg_h64 = LLILL 0 + undef %79.subreg_h64 = LLILL 0 + undef %89.subreg_h64 = LLILL 0 + %92 = LHIMux 0 + + bb.5: + + bb.6: + %51 = VLGVH undef %7, _, 0 + %53 = LLHRMux %51.subreg_l32 + %54 = VLGVH undef %1, _, 0 + %57.subreg_l32 = LLHRMux %54.subreg_l32 + %58 = COPY %57 + %58 = DLR %58, %53 + %60 = VLGVH undef %7, _, 3 + %62 = LLHRMux %60.subreg_l32 + %63 = VLGVH undef %1, _, 3 + %66.subreg_l32 = LLHRMux %63.subreg_l32 + %67 = COPY %66 + %67 = DLR %67, %62 + %73 = VLGVH undef %7, _, 1 + %75 = LLHRMux %73.subreg_l32 + %76 = VLGVH undef %1, _, 1 + %79.subreg_l32 = LLHRMux %76.subreg_l32 + %80 = COPY %79 + %80 = DLR %80, %75 + %83 = VLGVH undef %7, _, 2 + %85 = LLHRMux %83.subreg_l32 + %86 = VLGVH undef %1, _, 2 + %89.subreg_l32 = LLHRMux %86.subreg_l32 + %90 = COPY %89 + %90 = DLR %90, %85 + CHIMux %92, 0, implicit-def %cc + BRC 14, 6, %bb.7, implicit killed %cc + J %bb.6 + + bb.7: + CGHI undef %93, 0, implicit-def %cc + %96 = IPM implicit killed %cc + CGHI undef %97, 0, implicit-def %cc + BRC 14, 6, %bb.6, implicit killed %cc + + bb.8: + CHIMux %6, 0, implicit-def %cc + %10 = LLILL 41639 + dead %10 = LOCGR %10, %4, 14, 6, implicit killed %cc + CHIMux %92, 0, implicit-def %cc + BRC 14, 6, %bb.5, implicit killed %cc + J %bb.9 + + bb.9: + %82 = VLVGP %67.subreg_h64, %67.subreg_h64 + %82 = VLVGH %82, %58.subreg_hl32, _, 0 + %82 = VLVGH %82, %80.subreg_hl32, _, 1 + dead %82 = VLVGH %82, %90.subreg_hl32, _, 2 + %96 = AFIMux %96, 1879048192, implicit-def dead %cc + %96 = SRL %96, _, 31 + dead %11 = VLVGF %11, %96, _, 1 + %100 = LHIMux 0 + + bb.10: + CHIMux %100, 0, implicit-def %cc + BRC 14, 6, %bb.10, implicit killed %cc + J %bb.11 + + bb.11: + Return + +... diff --git a/test/CodeGen/X86/and-sink.ll b/test/CodeGen/X86/and-sink.ll index 46e50f2a6a74..0f877e778c70 100644 --- a/test/CodeGen/X86/and-sink.ll +++ b/test/CodeGen/X86/and-sink.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-unknown -verify-machineinstrs < %s | FileCheck %s ; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK-CGP %s @@ -8,12 +9,20 @@ ; Test that 'and' is sunk into bb0. define i32 @and_sink1(i32 %a, i1 %c) { ; CHECK-LABEL: and_sink1: -; CHECK: testb $1, -; CHECK: je -; CHECK-NOT: andl $4, -; CHECK: movl $0, A -; CHECK: testb $4, -; CHECK: jne +; CHECK: # BB#0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # BB#1: # %bb0 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl $0, A +; CHECK-NEXT: testb $4, %al +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # BB#2: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_3: # %bb2 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl ; CHECK-CGP-LABEL: @and_sink1( ; CHECK-CGP-NOT: and i32 @@ -37,16 +46,30 @@ bb2: ; Test that both 'and' and cmp get sunk to bb1. define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { ; CHECK-LABEL: and_sink2: -; CHECK: movl $0, A -; CHECK: testb $1, -; CHECK: je -; CHECK-NOT: andl $4, -; CHECK: movl $0, B -; CHECK: testb $1, -; CHECK: je -; CHECK: movl $0, C -; CHECK: testb $4, -; CHECK: jne +; CHECK: # BB#0: +; CHECK-NEXT: movl $0, A +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB1_5 +; CHECK-NEXT: # BB#1: # %bb0.preheader +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_2: # %bb0 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl $0, B +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB1_5 +; CHECK-NEXT: # BB#3: # %bb1 +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: movl $0, C +; CHECK-NEXT: testb $4, %cl +; CHECK-NEXT: jne .LBB1_2 +; CHECK-NEXT: # BB#4: # %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_5: # %bb3 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl ; CHECK-CGP-LABEL: @and_sink2( ; CHECK-CGP-NOT: and i32 @@ -77,12 +100,21 @@ bb3: ; Test that CodeGenPrepare doesn't get stuck in a loop sinking and hoisting a masked load. define i32 @and_sink3(i1 %c, i32* %p) { ; CHECK-LABEL: and_sink3: -; CHECK: testb $1, -; CHECK: je -; CHECK: movzbl -; CHECK-DAG: movl $0, A -; CHECK-DAG: testl % -; CHECK: je +; CHECK: # BB#0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB2_3 +; CHECK-NEXT: # BB#1: # %bb0 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movl $0, A +; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: .LBB2_3: # %bb2 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_2: # %bb1 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl ; CHECK-CGP-LABEL: @and_sink3( ; CHECK-CGP: load i32 @@ -106,15 +138,26 @@ bb2: ; Test that CodeGenPrepare sinks/duplicates non-immediate 'and'. define i32 @and_sink4(i32 %a, i32 %b, i1 %c) { ; CHECK-LABEL: and_sink4: -; CHECK: testb $1, -; CHECK: je -; CHECK-NOT: andl -; CHECK-DAG: movl $0, A -; CHECK-DAG: testl [[REG1:%[a-z0-9]+]], [[REG2:%[a-z0-9]+]] -; CHECK: jne -; CHECK-DAG: movl {{%[a-z0-9]+}}, B -; CHECK-DAG: testl [[REG1]], [[REG2]] -; CHECK: je +; CHECK: # BB#0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB3_4 +; CHECK-NEXT: # BB#1: # %bb0 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: testl %eax, %ecx +; CHECK-NEXT: movl $0, A +; CHECK-NEXT: jne .LBB3_4 +; CHECK-NEXT: # BB#2: # %bb1 +; CHECK-NEXT: leal (%ecx,%eax), %edx +; CHECK-NEXT: testl %eax, %ecx +; CHECK-NEXT: movl %edx, B +; CHECK-NEXT: je .LBB3_3 +; CHECK-NEXT: .LBB3_4: # %bb3 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB3_3: # %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl ; CHECK-CGP-LABEL: @and_sink4( ; CHECK-CGP-NOT: and i32 @@ -146,14 +189,26 @@ bb3: ; when it would increase register pressure. define i32 @and_sink5(i32 %a, i32 %b, i32 %a2, i32 %b2, i1 %c) { ; CHECK-LABEL: and_sink5: -; CHECK: testb $1, -; CHECK: je -; CHECK-DAG: andl {{[0-9]+\(%[a-z0-9]+\)}}, [[REG:%[a-z0-9]+]] -; CHECK-DAG: movl $0, A -; CHECK: jne -; CHECK-DAG: movl {{%[a-z0-9]+}}, B -; CHECK-DAG: testl [[REG]], [[REG]] -; CHECK: je +; CHECK: # BB#0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB4_4 +; CHECK-NEXT: # BB#1: # %bb0 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: andl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl $0, A +; CHECK-NEXT: jne .LBB4_4 +; CHECK-NEXT: # BB#2: # %bb1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movl %ecx, B +; CHECK-NEXT: je .LBB4_3 +; CHECK-NEXT: .LBB4_4: # %bb3 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB4_3: # %bb2 +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl ; CHECK-CGP-LABEL: @and_sink5( ; CHECK-CGP: and i32 diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 33ac15de9de9..8f6afa8785d0 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW define <16 x float> @sitof32(<16 x i32> %a) nounwind { ; ALL-LABEL: sitof32: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq %b = sitofp <16 x i32> %a to <16 x float> @@ -19,7 +19,7 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { define <8 x double> @sltof864(<8 x i64> %a) { ; NODQ-LABEL: sltof864: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; NODQ-NEXT: vpextrq $1, %xmm1, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 @@ -49,7 +49,7 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; NODQ-NEXT: retq ; ; DQ-LABEL: sltof864: -; DQ: ## BB#0: +; DQ: # BB#0: ; DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; DQ-NEXT: retq %b = sitofp <8 x i64> %a to <8 x double> @@ -58,7 +58,7 @@ define <8 x double> @sltof864(<8 x i64> %a) { define <4 x double> @sltof464(<4 x i64> %a) { ; NODQ-LABEL: sltof464: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NODQ-NEXT: vpextrq $1, %xmm1, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 @@ -74,15 +74,15 @@ define <4 x double> @sltof464(<4 x i64> %a) { ; NODQ-NEXT: retq ; ; VLDQ-LABEL: sltof464: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 ; VLDQ-NEXT: retq ; ; AVX512DQ-LABEL: sltof464: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512DQ-NEXT: retq %b = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %b @@ -90,7 +90,7 @@ define <4 x double> @sltof464(<4 x i64> %a) { define <2 x float> @sltof2f32(<2 x i64> %a) { ; NODQ-LABEL: sltof2f32: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; NODQ-NEXT: vmovq %xmm0, %rax @@ -101,15 +101,15 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { ; NODQ-NEXT: retq ; ; VLDQ-LABEL: sltof2f32: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 ; VLDQ-NEXT: retq ; ; AVX512DQ-LABEL: sltof2f32: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %b = sitofp <2 x i64> %a to <2 x float> @@ -118,7 +118,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { define <4 x float> @sltof4f32_mem(<4 x i64>* %a) { ; KNL-LABEL: sltof4f32_mem: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: vmovdqu (%rdi), %ymm0 ; KNL-NEXT: vpextrq $1, %xmm0, %rax ; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 @@ -135,12 +135,12 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) { ; KNL-NEXT: retq ; ; VLDQ-LABEL: sltof4f32_mem: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sltof4f32_mem: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vmovdqu (%rdi), %ymm0 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 @@ -158,15 +158,15 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) { ; VLNODQ-NEXT: retq ; ; AVX512DQ-LABEL: sltof4f32_mem: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vmovups (%rdi), %ymm0 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: sltof4f32_mem: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: vmovdqu (%rdi), %ymm0 ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax ; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 @@ -189,7 +189,7 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) { define <4 x i64> @f64tosl(<4 x double> %a) { ; NODQ-LABEL: f64tosl: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vextractf128 $1, %ymm0, %xmm1 ; NODQ-NEXT: vcvttsd2si %xmm1, %rax ; NODQ-NEXT: vmovq %rax, %xmm2 @@ -207,15 +207,15 @@ define <4 x i64> @f64tosl(<4 x double> %a) { ; NODQ-NEXT: retq ; ; VLDQ-LABEL: f64tosl: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 ; VLDQ-NEXT: retq ; ; AVX512DQ-LABEL: f64tosl: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512DQ-NEXT: retq %b = fptosi <4 x double> %a to <4 x i64> ret <4 x i64> %b @@ -223,7 +223,7 @@ define <4 x i64> @f64tosl(<4 x double> %a) { define <4 x i64> @f32tosl(<4 x float> %a) { ; NODQ-LABEL: f32tosl: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] ; NODQ-NEXT: vcvttss2si %xmm1, %rax ; NODQ-NEXT: vmovq %rax, %xmm1 @@ -241,15 +241,15 @@ define <4 x i64> @f32tosl(<4 x float> %a) { ; NODQ-NEXT: retq ; ; VLDQ-LABEL: f32tosl: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 ; VLDQ-NEXT: retq ; ; AVX512DQ-LABEL: f32tosl: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512DQ-NEXT: retq %b = fptosi <4 x float> %a to <4 x i64> ret <4 x i64> %b @@ -257,7 +257,7 @@ define <4 x i64> @f32tosl(<4 x float> %a) { define <4 x float> @sltof432(<4 x i64> %a) { ; KNL-LABEL: sltof432: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: vpextrq $1, %xmm0, %rax ; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; KNL-NEXT: vmovq %xmm0, %rax @@ -273,13 +273,13 @@ define <4 x float> @sltof432(<4 x i64> %a) { ; KNL-NEXT: retq ; ; VLDQ-LABEL: sltof432: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 ; VLDQ-NEXT: vzeroupper ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sltof432: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax @@ -296,15 +296,15 @@ define <4 x float> @sltof432(<4 x i64> %a) { ; VLNODQ-NEXT: retq ; ; AVX512DQ-LABEL: sltof432: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: sltof432: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax ; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -325,7 +325,7 @@ define <4 x float> @sltof432(<4 x i64> %a) { define <4 x float> @ultof432(<4 x i64> %a) { ; KNL-LABEL: ultof432: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: vpextrq $1, %xmm0, %rax ; KNL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; KNL-NEXT: vmovq %xmm0, %rax @@ -341,13 +341,13 @@ define <4 x float> @ultof432(<4 x i64> %a) { ; KNL-NEXT: retq ; ; VLDQ-LABEL: ultof432: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 ; VLDQ-NEXT: vzeroupper ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: ultof432: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax ; VLNODQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax @@ -364,15 +364,15 @@ define <4 x float> @ultof432(<4 x i64> %a) { ; VLNODQ-NEXT: retq ; ; AVX512DQ-LABEL: ultof432: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: ultof432: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax ; AVX512BW-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -393,7 +393,7 @@ define <4 x float> @ultof432(<4 x i64> %a) { define <8 x double> @ultof64(<8 x i64> %a) { ; NODQ-LABEL: ultof64: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; NODQ-NEXT: vpextrq $1, %xmm1, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 @@ -423,7 +423,7 @@ define <8 x double> @ultof64(<8 x i64> %a) { ; NODQ-NEXT: retq ; ; DQ-LABEL: ultof64: -; DQ: ## BB#0: +; DQ: # BB#0: ; DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; DQ-NEXT: retq %b = uitofp <8 x i64> %a to <8 x double> @@ -432,7 +432,7 @@ define <8 x double> @ultof64(<8 x i64> %a) { define <16 x i32> @fptosi00(<16 x float> %a) nounwind { ; ALL-LABEL: fptosi00: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 ; ALL-NEXT: retq %b = fptosi <16 x float> %a to <16 x i32> @@ -441,7 +441,7 @@ define <16 x i32> @fptosi00(<16 x float> %a) nounwind { define <16 x i32> @fptoui00(<16 x float> %a) nounwind { ; ALL-LABEL: fptoui00: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 ; ALL-NEXT: retq %b = fptoui <16 x float> %a to <16 x i32> @@ -450,14 +450,14 @@ define <16 x i32> @fptoui00(<16 x float> %a) nounwind { define <8 x i32> @fptoui_256(<8 x float> %a) nounwind { ; NOVL-LABEL: fptoui_256: -; NOVL: ## BB#0: -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; NOVL: # BB#0: +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0 -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; NOVL-NEXT: retq ; ; VL-LABEL: fptoui_256: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcvttps2udq %ymm0, %ymm0 ; VL-NEXT: retq %b = fptoui <8 x float> %a to <8 x i32> @@ -466,30 +466,30 @@ define <8 x i32> @fptoui_256(<8 x float> %a) nounwind { define <4 x i32> @fptoui_128(<4 x float> %a) nounwind { ; KNL-LABEL: fptoui_128: -; KNL: ## BB#0: -; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; KNL: # BB#0: +; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; KNL-NEXT: vcvttps2udq %zmm0, %zmm0 -; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; KNL-NEXT: retq ; ; VL-LABEL: fptoui_128: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcvttps2udq %xmm0, %xmm0 ; VL-NEXT: retq ; ; AVX512DQ-LABEL: fptoui_128: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: fptoui_128: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vcvttps2udq %zmm0, %zmm0 -; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %b = fptoui <4 x float> %a to <4 x i32> @@ -498,7 +498,7 @@ define <4 x i32> @fptoui_128(<4 x float> %a) nounwind { define <8 x i32> @fptoui01(<8 x double> %a) nounwind { ; ALL-LABEL: fptoui01: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvttpd2udq %zmm0, %ymm0 ; ALL-NEXT: retq %b = fptoui <8 x double> %a to <8 x i32> @@ -507,31 +507,31 @@ define <8 x i32> @fptoui01(<8 x double> %a) nounwind { define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind { ; KNL-LABEL: fptoui_256d: -; KNL: ## BB#0: -; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; KNL: # BB#0: +; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; KNL-NEXT: vcvttpd2udq %zmm0, %ymm0 -; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; KNL-NEXT: retq ; ; VL-LABEL: fptoui_256d: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcvttpd2udq %ymm0, %xmm0 ; VL-NEXT: vzeroupper ; VL-NEXT: retq ; ; AVX512DQ-LABEL: fptoui_256d: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: fptoui_256d: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vcvttpd2udq %zmm0, %ymm0 -; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %b = fptoui <4 x double> %a to <4 x i32> @@ -540,7 +540,7 @@ define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind { define <8 x double> @sitof64(<8 x i32> %a) { ; ALL-LABEL: sitof64: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 ; ALL-NEXT: retq %b = sitofp <8 x i32> %a to <8 x double> @@ -548,31 +548,31 @@ define <8 x double> @sitof64(<8 x i32> %a) { } define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { ; KNL-LABEL: sitof64_mask: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} ; KNL-NEXT: retq ; ; VLBW-LABEL: sitof64_mask: -; VLBW: ## BB#0: +; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 ; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} ; VLBW-NEXT: retq ; ; VLNOBW-LABEL: sitof64_mask: -; VLNOBW: ## BB#0: +; VLNOBW: # BB#0: ; VLNOBW-NEXT: kmovw %edi, %k1 ; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} ; VLNOBW-NEXT: retq ; ; AVX512DQ-LABEL: sitof64_mask: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: sitof64_mask: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -583,31 +583,31 @@ define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind } define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind { ; KNL-LABEL: sitof64_maskz: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; VLBW-LABEL: sitof64_maskz: -; VLBW: ## BB#0: +; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 ; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} ; VLBW-NEXT: retq ; ; VLNOBW-LABEL: sitof64_maskz: -; VLNOBW: ## BB#0: +; VLNOBW: # BB#0: ; VLNOBW-NEXT: kmovw %edi, %k1 ; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} ; VLNOBW-NEXT: retq ; ; AVX512DQ-LABEL: sitof64_maskz: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: sitof64_maskz: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq @@ -619,7 +619,7 @@ define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind { define <8 x i32> @fptosi01(<8 x double> %a) { ; ALL-LABEL: fptosi01: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvttpd2dq %zmm0, %ymm0 ; ALL-NEXT: retq %b = fptosi <8 x double> %a to <8 x i32> @@ -628,12 +628,12 @@ define <8 x i32> @fptosi01(<8 x double> %a) { define <4 x i32> @fptosi03(<4 x double> %a) { ; KNL-LABEL: fptosi03: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: vcvttpd2dq %ymm0, %xmm0 ; KNL-NEXT: retq ; ; AVX512-LABEL: fptosi03: -; AVX512: ## BB#0: +; AVX512: # BB#0: ; AVX512-NEXT: vcvttpd2dq %ymm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -643,14 +643,14 @@ define <4 x i32> @fptosi03(<4 x double> %a) { define <16 x float> @fptrunc00(<16 x double> %b) nounwind { ; NODQ-LABEL: fptrunc00: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vcvtpd2ps %zmm0, %ymm0 ; NODQ-NEXT: vcvtpd2ps %zmm1, %ymm1 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq ; ; DQ-LABEL: fptrunc00: -; DQ: ## BB#0: +; DQ: # BB#0: ; DQ-NEXT: vcvtpd2ps %zmm0, %ymm0 ; DQ-NEXT: vcvtpd2ps %zmm1, %ymm1 ; DQ-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 @@ -661,12 +661,12 @@ define <16 x float> @fptrunc00(<16 x double> %b) nounwind { define <4 x float> @fptrunc01(<4 x double> %b) { ; KNL-LABEL: fptrunc01: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: vcvtpd2ps %ymm0, %xmm0 ; KNL-NEXT: retq ; ; AVX512-LABEL: fptrunc01: -; AVX512: ## BB#0: +; AVX512: # BB#0: ; AVX512-NEXT: vcvtpd2ps %ymm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -676,7 +676,7 @@ define <4 x float> @fptrunc01(<4 x double> %b) { define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { ; KNL-LABEL: fptrunc02: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 ; KNL-NEXT: vcvtpd2ps %ymm0, %xmm0 @@ -684,7 +684,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { ; KNL-NEXT: retq ; ; VL-LABEL: fptrunc02: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpslld $31, %xmm1, %xmm1 ; VL-NEXT: vptestmd %xmm1, %xmm1, %k1 ; VL-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} @@ -692,7 +692,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { ; VL-NEXT: retq ; ; AVX512DQ-LABEL: fptrunc02: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpsrad $31, %xmm1, %xmm1 ; AVX512DQ-NEXT: vcvtpd2ps %ymm0, %xmm0 @@ -701,7 +701,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: fptrunc02: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsrad $31, %xmm1, %xmm1 ; AVX512BW-NEXT: vcvtpd2ps %ymm0, %xmm0 @@ -715,7 +715,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind { ; ALL-LABEL: fptrunc03: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 ; ALL-NEXT: retq %ext = extractelement <2 x double> %a0, i32 0 @@ -726,7 +726,7 @@ define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind { define <8 x double> @fpext00(<8 x float> %b) nounwind { ; ALL-LABEL: fpext00: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 ; ALL-NEXT: retq %a = fpext <8 x float> %b to <8 x double> @@ -735,14 +735,14 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind { define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) { ; NOVL-LABEL: fpext01: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0 ; NOVL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1 ; NOVL-NEXT: vandpd %ymm0, %ymm1, %ymm0 ; NOVL-NEXT: retq ; ; VL-LABEL: fpext01: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcmpltpd %ymm2, %ymm1, %k1 ; VL-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} ; VL-NEXT: retq @@ -754,7 +754,7 @@ define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) { define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind { ; ALL-LABEL: fpext02: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: retq %ext = extractelement <4 x float> %a1, i32 0 @@ -765,7 +765,7 @@ define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind { define double @funcA(i64* nocapture %e) { ; ALL-LABEL: funcA: -; ALL: ## BB#0: ## %entry +; ALL: # BB#0: # %entry ; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: @@ -776,7 +776,7 @@ entry: define double @funcB(i32* %e) { ; ALL-LABEL: funcB: -; ALL: ## BB#0: ## %entry +; ALL: # BB#0: # %entry ; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: @@ -787,7 +787,7 @@ entry: define float @funcC(i32* %e) { ; ALL-LABEL: funcC: -; ALL: ## BB#0: ## %entry +; ALL: # BB#0: # %entry ; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: @@ -798,7 +798,7 @@ entry: define float @i64tof32(i64* %e) { ; ALL-LABEL: i64tof32: -; ALL: ## BB#0: ## %entry +; ALL: # BB#0: # %entry ; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 ; ALL-NEXT: retq entry: @@ -809,7 +809,7 @@ entry: define void @fpext() { ; ALL-LABEL: fpext: -; ALL: ## BB#0: ## %entry +; ALL: # BB#0: # %entry ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; ALL-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) @@ -825,7 +825,7 @@ entry: define void @fpround_scalar() nounwind uwtable { ; ALL-LABEL: fpround_scalar: -; ALL: ## BB#0: ## %entry +; ALL: # BB#0: # %entry ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; ALL-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) @@ -841,7 +841,7 @@ entry: define double @long_to_double(i64 %x) { ; ALL-LABEL: long_to_double: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vmovq %rdi, %xmm0 ; ALL-NEXT: retq %res = bitcast i64 %x to double @@ -850,7 +850,7 @@ define double @long_to_double(i64 %x) { define i64 @double_to_long(double %x) { ; ALL-LABEL: double_to_long: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vmovq %xmm0, %rax ; ALL-NEXT: retq %res = bitcast double %x to i64 @@ -859,7 +859,7 @@ define i64 @double_to_long(double %x) { define float @int_to_float(i32 %x) { ; ALL-LABEL: int_to_float: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vmovd %edi, %xmm0 ; ALL-NEXT: retq %res = bitcast i32 %x to float @@ -868,7 +868,7 @@ define float @int_to_float(i32 %x) { define i32 @float_to_int(float %x) { ; ALL-LABEL: float_to_int: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vmovd %xmm0, %eax ; ALL-NEXT: retq %res = bitcast float %x to i32 @@ -877,7 +877,7 @@ define i32 @float_to_int(float %x) { define <16 x double> @uitof64(<16 x i32> %a) nounwind { ; NODQ-LABEL: uitof64: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2 ; NODQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1 @@ -885,7 +885,7 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind { ; NODQ-NEXT: retq ; ; DQ-LABEL: uitof64: -; DQ: ## BB#0: +; DQ: # BB#0: ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2 ; DQ-NEXT: vextracti32x8 $1, %zmm0, %ymm0 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1 @@ -896,31 +896,31 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind { } define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { ; KNL-LABEL: uitof64_mask: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} ; KNL-NEXT: retq ; ; VLBW-LABEL: uitof64_mask: -; VLBW: ## BB#0: +; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 ; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} ; VLBW-NEXT: retq ; ; VLNOBW-LABEL: uitof64_mask: -; VLNOBW: ## BB#0: +; VLNOBW: # BB#0: ; VLNOBW-NEXT: kmovw %edi, %k1 ; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} ; VLNOBW-NEXT: retq ; ; AVX512DQ-LABEL: uitof64_mask: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: uitof64_mask: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -931,31 +931,31 @@ define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind } define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind { ; KNL-LABEL: uitof64_maskz: -; KNL: ## BB#0: +; KNL: # BB#0: ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; ; VLBW-LABEL: uitof64_maskz: -; VLBW: ## BB#0: +; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 ; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} ; VLBW-NEXT: retq ; ; VLNOBW-LABEL: uitof64_maskz: -; VLNOBW: ## BB#0: +; VLNOBW: # BB#0: ; VLNOBW-NEXT: kmovw %edi, %k1 ; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} ; VLNOBW-NEXT: retq ; ; AVX512DQ-LABEL: uitof64_maskz: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: kmovw %edi, %k1 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: uitof64_maskz: -; AVX512BW: ## BB#0: +; AVX512BW: # BB#0: ; AVX512BW-NEXT: kmovd %edi, %k1 ; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq @@ -967,14 +967,14 @@ define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind { define <4 x double> @uitof64_256(<4 x i32> %a) nounwind { ; NOVL-LABEL: uitof64_256: -; NOVL: ## BB#0: -; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> +; NOVL: # BB#0: +; NOVL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; NOVL-NEXT: retq ; ; VL-LABEL: uitof64_256: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcvtudq2pd %xmm0, %ymm0 ; VL-NEXT: retq %b = uitofp <4 x i32> %a to <4 x double> @@ -983,7 +983,7 @@ define <4 x double> @uitof64_256(<4 x i32> %a) nounwind { define <16 x float> @uitof32(<16 x i32> %a) nounwind { ; ALL-LABEL: uitof32: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 ; ALL-NEXT: retq %b = uitofp <16 x i32> %a to <16 x float> @@ -992,14 +992,14 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind { define <8 x float> @uitof32_256(<8 x i32> %a) nounwind { ; NOVL-LABEL: uitof32_256: -; NOVL: ## BB#0: -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; NOVL: # BB#0: +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; NOVL-NEXT: retq ; ; VL-LABEL: uitof32_256: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcvtudq2ps %ymm0, %ymm0 ; VL-NEXT: retq %b = uitofp <8 x i32> %a to <8 x float> @@ -1008,30 +1008,30 @@ define <8 x float> @uitof32_256(<8 x i32> %a) nounwind { define <4 x float> @uitof32_128(<4 x i32> %a) nounwind { ; KNL-LABEL: uitof32_128: -; KNL: ## BB#0: -; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; KNL: # BB#0: +; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0 -; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; KNL-NEXT: retq ; ; VL-LABEL: uitof32_128: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0 ; VL-NEXT: retq ; ; AVX512DQ-LABEL: uitof32_128: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 -; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: uitof32_128: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> +; AVX512BW: # BB#0: +; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vcvtudq2ps %zmm0, %zmm0 -; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> +; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %b = uitofp <4 x i32> %a to <4 x float> @@ -1040,7 +1040,7 @@ define <4 x float> @uitof32_128(<4 x i32> %a) nounwind { define i32 @fptosi02(float %a) nounwind { ; ALL-LABEL: fptosi02: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvttss2si %xmm0, %eax ; ALL-NEXT: retq %b = fptosi float %a to i32 @@ -1049,7 +1049,7 @@ define i32 @fptosi02(float %a) nounwind { define i32 @fptoui02(float %a) nounwind { ; ALL-LABEL: fptoui02: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvttss2usi %xmm0, %eax ; ALL-NEXT: retq %b = fptoui float %a to i32 @@ -1058,7 +1058,7 @@ define i32 @fptoui02(float %a) nounwind { define float @uitofp02(i32 %a) nounwind { ; ALL-LABEL: uitofp02: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 ; ALL-NEXT: retq %b = uitofp i32 %a to float @@ -1067,7 +1067,7 @@ define float @uitofp02(i32 %a) nounwind { define double @uitofp03(i32 %a) nounwind { ; ALL-LABEL: uitofp03: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 ; ALL-NEXT: retq %b = uitofp i32 %a to double @@ -1076,7 +1076,7 @@ define double @uitofp03(i32 %a) nounwind { define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { ; NODQ-LABEL: sitofp_16i1_float: -; NODQ: ## BB#0: +; NODQ: # BB#0: ; NODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} @@ -1084,7 +1084,7 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { ; NODQ-NEXT: retq ; ; DQ-LABEL: sitofp_16i1_float: -; DQ: ## BB#0: +; DQ: # BB#0: ; DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 ; DQ-NEXT: vpmovm2d %k0, %zmm0 @@ -1097,7 +1097,7 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { ; ALL-LABEL: sitofp_16i8_float: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq @@ -1107,7 +1107,7 @@ define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { ; ALL-LABEL: sitofp_16i16_float: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpmovsxwd %ymm0, %zmm0 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq @@ -1117,7 +1117,7 @@ define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { ; ALL-LABEL: sitofp_8i16_double: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpmovsxwd %xmm0, %ymm0 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 ; ALL-NEXT: retq @@ -1127,7 +1127,7 @@ define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { ; ALL-LABEL: sitofp_8i8_double: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; ALL-NEXT: vpslld $24, %ymm0, %ymm0 ; ALL-NEXT: vpsrad $24, %ymm0, %ymm0 @@ -1139,7 +1139,7 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { define <16 x double> @sitofp_16i1_double(<16 x double> %a) { ; NOVLDQ-LABEL: sitofp_16i1_double: -; NOVLDQ: ## BB#0: +; NOVLDQ: # BB#0: ; NOVLDQ-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2 @@ -1152,7 +1152,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) { ; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sitofp_16i1_double: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2 ; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0 ; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1 @@ -1163,7 +1163,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_16i1_double: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2 @@ -1175,7 +1175,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) { ; VLNODQ-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_16i1_double: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1 @@ -1191,7 +1191,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) { define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; NOVLDQ-LABEL: sitofp_8i1_double: -; NOVLDQ: ## BB#0: +; NOVLDQ: # BB#0: ; NOVLDQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} @@ -1200,7 +1200,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sitofp_8i1_double: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1 ; VLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0 @@ -1208,7 +1208,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_8i1_double: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 @@ -1217,7 +1217,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { ; VLNODQ-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_8i1_double: -; AVX512DQ: ## BB#0: +; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 @@ -1230,8 +1230,8 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) { define <8 x float> @sitofp_8i1_float(<8 x float> %a) { ; NOVLDQ-LABEL: sitofp_8i1_float: -; NOVLDQ: ## BB#0: -; NOVLDQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; NOVLDQ: # BB#0: +; NOVLDQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; NOVLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} @@ -1240,7 +1240,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) { ; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sitofp_8i1_float: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %k0 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0 @@ -1248,7 +1248,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_8i1_float: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 @@ -1257,8 +1257,8 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) { ; VLNODQ-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_8i1_float: -; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512DQ-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 @@ -1271,14 +1271,14 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) { define <4 x float> @sitofp_4i1_float(<4 x float> %a) { ; NOVL-LABEL: sitofp_4i1_float: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 ; NOVL-NEXT: retq ; ; VLDQ-LABEL: sitofp_4i1_float: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0 @@ -1286,7 +1286,7 @@ define <4 x float> @sitofp_4i1_float(<4 x float> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_4i1_float: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 @@ -1300,7 +1300,7 @@ define <4 x float> @sitofp_4i1_float(<4 x float> %a) { define <4 x double> @sitofp_4i1_double(<4 x double> %a) { ; NOVL-LABEL: sitofp_4i1_double: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; NOVL-NEXT: vpmovqd %zmm0, %ymm0 @@ -1308,7 +1308,7 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) { ; NOVL-NEXT: retq ; ; VLDQ-LABEL: sitofp_4i1_double: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; VLDQ-NEXT: vcmpltpd %ymm0, %ymm1, %k0 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0 @@ -1316,7 +1316,7 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_4i1_double: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 @@ -1330,14 +1330,14 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) { define <2 x float> @sitofp_2i1_float(<2 x float> %a) { ; NOVL-LABEL: sitofp_2i1_float: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 ; NOVL-NEXT: retq ; ; VLDQ-LABEL: sitofp_2i1_float: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0 @@ -1345,7 +1345,7 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_2i1_float: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 @@ -1359,7 +1359,7 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) { define <2 x double> @sitofp_2i1_double(<2 x double> %a) { ; NOVL-LABEL: sitofp_2i1_double: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -1367,7 +1367,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) { ; NOVL-NEXT: retq ; ; VLDQ-LABEL: sitofp_2i1_double: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0 ; VLDQ-NEXT: vpmovm2q %k0, %xmm0 @@ -1375,7 +1375,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sitofp_2i1_double: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 @@ -1393,7 +1393,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) { define <16 x float> @uitofp_16i8(<16 x i8>%a) { ; ALL-LABEL: uitofp_16i8: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq @@ -1403,7 +1403,7 @@ define <16 x float> @uitofp_16i8(<16 x i8>%a) { define <16 x float> @uitofp_16i16(<16 x i16>%a) { ; ALL-LABEL: uitofp_16i16: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq @@ -1413,7 +1413,7 @@ define <16 x float> @uitofp_16i16(<16 x i16>%a) { define <16 x float> @uitofp_16i1_float(<16 x i32> %a) { ; ALL-LABEL: uitofp_16i1_float: -; ALL: ## BB#0: +; ALL: # BB#0: ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} @@ -1426,7 +1426,7 @@ define <16 x float> @uitofp_16i1_float(<16 x i32> %a) { define <16 x double> @uitofp_16i1_double(<16 x i32> %a) { ; NOVL-LABEL: uitofp_16i1_double: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ; NOVL-NEXT: movq {{.*}}(%rip), %rax @@ -1440,7 +1440,7 @@ define <16 x double> @uitofp_16i1_double(<16 x i32> %a) { ; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_16i1_double: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; VL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ; VL-NEXT: movl {{.*}}(%rip), %eax @@ -1457,18 +1457,18 @@ define <16 x double> @uitofp_16i1_double(<16 x i32> %a) { define <8 x float> @uitofp_8i1_float(<8 x i32> %a) { ; NOVL-LABEL: uitofp_8i1_float: -; NOVL: ## BB#0: -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; NOVL: # BB#0: +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vpmovqd %zmm0, %ymm0 ; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_8i1_float: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} @@ -1481,8 +1481,8 @@ define <8 x float> @uitofp_8i1_float(<8 x i32> %a) { define <8 x double> @uitofp_8i1_double(<8 x i32> %a) { ; NOVL-LABEL: uitofp_8i1_double: -; NOVL: ## BB#0: -; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; NOVL: # BB#0: +; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} @@ -1491,7 +1491,7 @@ define <8 x double> @uitofp_8i1_double(<8 x i32> %a) { ; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_8i1_double: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} @@ -1504,7 +1504,7 @@ define <8 x double> @uitofp_8i1_double(<8 x i32> %a) { define <4 x float> @uitofp_4i1_float(<4 x i32> %a) { ; NOVL-LABEL: uitofp_4i1_float: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 @@ -1512,7 +1512,7 @@ define <4 x float> @uitofp_4i1_float(<4 x i32> %a) { ; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_4i1_float: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} @@ -1525,7 +1525,7 @@ define <4 x float> @uitofp_4i1_float(<4 x i32> %a) { define <4 x double> @uitofp_4i1_double(<4 x i32> %a) { ; NOVL-LABEL: uitofp_4i1_double: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0 @@ -1533,7 +1533,7 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) { ; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_4i1_double: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} @@ -1546,7 +1546,7 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) { define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { ; NOVL-LABEL: uitofp_2i1_float: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] @@ -1562,7 +1562,7 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { ; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_2i1_float: -; VL: ## BB#0: +; VL: # BB#0: ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 @@ -1576,7 +1576,7 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { define <2 x double> @uitofp_2i1_double(<2 x i32> %a) { ; NOVL-LABEL: uitofp_2i1_double: -; NOVL: ## BB#0: +; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] @@ -1586,7 +1586,7 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) { ; NOVL-NEXT: retq ; ; VLDQ-LABEL: uitofp_2i1_double: -; VLDQ: ## BB#0: +; VLDQ: # BB#0: ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 @@ -1595,7 +1595,7 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) { ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: uitofp_2i1_double: -; VLNODQ: ## BB#0: +; VLNODQ: # BB#0: ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 diff --git a/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll index b13965a30ed8..bbe31c5c2ac5 100644 --- a/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll +++ b/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll @@ -1203,3 +1203,35 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) { ret <8 x double> %res2 } + + +; ALL: .LCPI38 +; ALL-NEXT: .long 4290379776 # 0xffba0000 + +; AVX: .LCPI38 +; AVX-NEXT: .long 4290379776 # float NaN + +define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) { +; ALL32-LABEL: f8xi16_i32_NaN: +; ALL32: # BB#0: +; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1 +; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL32-NEXT: retl +; +; ALL64-LABEL: f8xi16_i32_NaN: +; ALL64: # BB#0: +; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL64-NEXT: retq +; +; AVX-LABEL: f8xi16_i32_NaN: +; AVX: # BB#0: +; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 + %res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a + %res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1 + ret <8 x i16> %res2 +} diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index ae0f4406ba0d..1218b68b1be4 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -405,12 +405,7 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { ; ; AVX-LABEL: _clearupper16xi8a: ; AVX: # BB#0: -; AVX-NEXT: vpextrb $0, %xmm0, %eax -; AVX-NEXT: vpextrb $1, %xmm0, %ecx -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] -; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x0 = extractelement <16 x i8> %0, i32 0 %x1 = extractelement <16 x i8> %0, i32 1 @@ -575,39 +570,10 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: _clearupper32xi8a: -; AVX1: # BB#0: -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: vpextrb $1, %xmm0, %ecx -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpextrb $0, %xmm1, %edx -; AVX1-NEXT: vpextrb $1, %xmm1, %esi -; AVX1-NEXT: vmovd %edx, %xmm2 -; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] -; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper32xi8a: -; AVX2: # BB#0: -; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: vpextrb $1, %xmm0, %ecx -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpextrb $0, %xmm1, %edx -; AVX2-NEXT: vpextrb $1, %xmm1, %esi -; AVX2-NEXT: vmovd %edx, %xmm2 -; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper32xi8a: +; AVX: # BB#0: +; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: retq %x0 = extractelement <32 x i8> %0, i32 0 %x1 = extractelement <32 x i8> %0, i32 1 %x2 = extractelement <32 x i8> %0, i32 2 diff --git a/test/CodeGen/X86/scavenger.mir b/test/CodeGen/X86/scavenger.mir new file mode 100644 index 000000000000..8d97aeb22cb9 --- /dev/null +++ b/test/CodeGen/X86/scavenger.mir @@ -0,0 +1,54 @@ +# RUN: llc -mtriple=i386-- -run-pass scavenger-test -verify-machineinstrs -o - %s | FileCheck %s +--- +# CHECK-LABEL: name: func0 +name: func0 +tracksRegLiveness: true +body: | + bb.0: + %0 : gr32 = MOV32ri 42 + %ebp = COPY %0 +... +--- +# CHECK-LABEL: name: func2 +name: func2 +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-NOT: %eax = MOV32ri 42 + ; CHECK: [[REG0:%e[a-z]+]] = MOV32ri 42 + ; CHECK: %ebp = COPY [[REG0]] + %eax = MOV32ri 13 + %0 : gr32 = MOV32ri 42 + %ebp = COPY %0 + + ; CHECK: [[REG1:%e[a-z]+]] = MOV32ri 23 + ; CHECK: [[REG2:%e[a-z]+]] = MOV32ri 7 + ; CHECK: [[REG1]] = ADD32ri8 [[REG1]], 5, implicit-def dead %eflags + %1 : gr32 = MOV32ri 23 + %2 : gr32 = MOV32ri 7 + %1 = ADD32ri8 %1, 5, implicit-def dead %eflags + + NOOP implicit %ebp + + ; CHECK: NOOP implicit [[REG2]] + ; CHECK: NOOP implicit [[REG1]] + NOOP implicit %2 + NOOP implicit %1 + RETQ %eax +... +--- +# Defs without uses are currently broken +#name: func3 +#tracksRegLiveness: true +#body: | +# bb.0: +# dead %0 : gr32 = MOV32ri 42 +... +--- +# Uses without defs are currently broken (and honestly not that useful). +#name: func3 +#tracksRegLiveness: true +#body: | +# bb.0: +# NOOP undef implicit %0 : gr32 +... diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index 1afef86a5f11..7c2937936313 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -15,6 +15,7 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind { ; CHECK-NEXT: cmovneq %rdi, %rsi ; CHECK-NEXT: movl (%rsi), %eax ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test1: ; MCU: # BB#0: @@ -55,6 +56,7 @@ define i32 @test2() nounwind { ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq ; CHECK-NEXT: LBB1_1: ## %bb90 +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test2: ; MCU: # BB#0: # %entry @@ -100,6 +102,7 @@ define float @test3(i32 %x) nounwind readnone { ; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test3: ; MCU: # BB#0: # %entry @@ -123,6 +126,7 @@ define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly { ; CHECK-NEXT: seta %al ; CHECK-NEXT: movsbl (%rdi,%rax,4), %eax ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test4: ; MCU: # BB#0: # %entry @@ -157,6 +161,7 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind { ; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; CHECK-NEXT: movd %xmm0, (%rsi) ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test5: ; MCU: # BB#0: @@ -196,6 +201,7 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-NEXT: mulps %xmm0, %xmm0 ; CHECK-NEXT: movaps %xmm0, (%rsi) ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test6: ; MCU: # BB#0: @@ -267,6 +273,7 @@ define x86_fp80 @test7(i32 %tmp8) nounwind { ; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ; CHECK-NEXT: fldt (%rax,%rcx) ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test7: ; MCU: # BB#0: @@ -319,6 +326,7 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) ; GENERIC-NEXT: movq %xmm0, 16(%rsi) ; GENERIC-NEXT: movdqa %xmm1, (%rsi) ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test8: ; ATOM: ## BB#0: @@ -358,6 +366,7 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) ; ATOM-NEXT: movq %xmm0, 16(%rsi) ; ATOM-NEXT: movdqa %xmm1, (%rsi) ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test8: ; MCU: # BB#0: @@ -448,6 +457,7 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq %rsi, %rax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test9: ; ATOM: ## BB#0: @@ -457,6 +467,7 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test9: ; MCU: # BB#0: @@ -483,6 +494,7 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq %rsi, %rax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test9a: ; ATOM: ## BB#0: @@ -492,6 +504,7 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test9a: ; MCU: # BB#0: @@ -516,6 +529,7 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq %rsi, %rax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test9b: ; ATOM: ## BB#0: @@ -525,6 +539,7 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test9b: ; MCU: # BB#0: @@ -552,6 +567,7 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; GENERIC-NEXT: sbbq %rax, %rax ; GENERIC-NEXT: orq $1, %rax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test10: ; ATOM: ## BB#0: @@ -561,6 +577,7 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test10: ; MCU: # BB#0: @@ -586,6 +603,7 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; CHECK-NEXT: notq %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test11: ; MCU: # BB#0: @@ -612,6 +630,7 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; CHECK-NEXT: notq %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test11a: ; MCU: # BB#0: @@ -641,6 +660,7 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone { ; GENERIC-NEXT: movq $-1, %rdi ; GENERIC-NEXT: cmovnoq %rax, %rdi ; GENERIC-NEXT: jmp __Znam ## TAILCALL +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test12: ; ATOM: ## BB#0: ## %entry @@ -650,6 +670,7 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone { ; ATOM-NEXT: movq $-1, %rdi ; ATOM-NEXT: cmovnoq %rax, %rdi ; ATOM-NEXT: jmp __Znam ## TAILCALL +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test12: ; MCU: # BB#0: # %entry @@ -700,6 +721,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind { ; GENERIC-NEXT: cmpl %esi, %edi ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test13: ; ATOM: ## BB#0: @@ -710,6 +732,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test13: ; MCU: # BB#0: @@ -728,6 +751,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind { ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: notl %eax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test14: ; ATOM: ## BB#0: @@ -737,6 +761,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test14: ; MCU: # BB#0: @@ -756,6 +781,7 @@ define i32 @test15(i32 %x) nounwind { ; GENERIC-NEXT: negl %edi ; GENERIC-NEXT: sbbl %eax, %eax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test15: ; ATOM: ## BB#0: ## %entry @@ -766,6 +792,7 @@ define i32 @test15(i32 %x) nounwind { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test15: ; MCU: # BB#0: # %entry @@ -817,6 +844,7 @@ define i16 @test17(i16 %x) nounwind { ; GENERIC-NEXT: negw %di ; GENERIC-NEXT: sbbw %ax, %ax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test17: ; ATOM: ## BB#0: ## %entry @@ -827,6 +855,7 @@ define i16 @test17(i16 %x) nounwind { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test17: ; MCU: # BB#0: # %entry @@ -846,6 +875,7 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; GENERIC-NEXT: cmovgel %edx, %esi ; GENERIC-NEXT: movl %esi, %eax ; GENERIC-NEXT: retq +; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test18: ; ATOM: ## BB#0: @@ -855,6 +885,7 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: retq +; ATOM-NEXT: ## -- End function ; ; MCU-LABEL: test18: ; MCU: # BB#0: diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll index 41f5d2d5be23..7f35258377ec 100644 --- a/test/CodeGen/X86/shrink-compare.ll +++ b/test/CodeGen/X86/shrink-compare.ll @@ -1,8 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s declare void @bar() define void @test1(i32* nocapture %X) nounwind minsize { +; CHECK-LABEL: test1: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $47, (%rdi) +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %tmp1 = load i32, i32* %X, align 4 %and = and i32 %tmp1, 255 @@ -15,11 +22,15 @@ if.then: if.end: ret void -; CHECK-LABEL: test1: -; CHECK: cmpb $47, (%{{rdi|rcx}}) } define void @test2(i32 %X) nounwind minsize { +; CHECK-LABEL: test2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $47, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 47 @@ -31,11 +42,15 @@ if.then: if.end: ret void -; CHECK-LABEL: test2: -; CHECK: cmpb $47, %{{dil|cl}} } define void @test3(i32 %X) nounwind minsize { +; CHECK-LABEL: test3: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $-1, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 255 @@ -47,12 +62,22 @@ if.then: if.end: ret void -; CHECK-LABEL: test3: -; CHECK: cmpb $-1, %{{dil|cl}} } ; PR16083 define i1 @test4(i64 %a, i32 %b) { +; CHECK-LABEL: test4: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: je .LBB3_1 +; CHECK-NEXT: # BB#2: # %lor.end +; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB3_1: # %lor.rhs +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> +; CHECK-NEXT: retq entry: %tobool = icmp ne i32 %b, 0 br i1 %tobool, label %lor.end, label %lor.rhs @@ -71,6 +96,16 @@ lor.end: ; preds = %lor.rhs, %entry ; PR16551 define void @test5(i32 %X) nounwind minsize { +; CHECK-LABEL: test5: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movzbl x+{{.*}}(%rip), %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movzwl x+{{.*}}(%rip), %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: cmpl $1, %ecx +; CHECK-NEXT: jne bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %bf.load = load i56, i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4 %bf.lshr = lshr i56 %bf.load, 32 @@ -84,17 +119,16 @@ if.then: if.end: ret void - -; CHECK-LABEL: test5: -; CHECK-NOT: cmpl $1,{{.*}}x+4 -; CHECK: ret } -; CHECK-LABEL: test2_1: -; CHECK: movzbl -; CHECK: cmpl $256 -; CHECK: je bar define void @test2_1(i32 %X) nounwind minsize { +; CHECK-LABEL: test2_1: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: cmpl $256, %eax # imm = 0x100 +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 256 @@ -108,9 +142,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_1: -; CHECK: cmpb $1, %{{dil|cl}} define void @test_sext_i8_icmp_1(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_1: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $1, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, 1 @@ -124,9 +162,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_47: -; CHECK: cmpb $47, %{{dil|cl}} define void @test_sext_i8_icmp_47(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_47: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $47, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, 47 @@ -140,9 +182,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_127: -; CHECK: cmpb $127, %{{dil|cl}} define void @test_sext_i8_icmp_127(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_127: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $127, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, 127 @@ -156,9 +202,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_neg1: -; CHECK: cmpb $-1, %{{dil|cl}} define void @test_sext_i8_icmp_neg1(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_neg1: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $-1, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, -1 @@ -172,9 +222,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_neg2: -; CHECK: cmpb $-2, %{{dil|cl}} define void @test_sext_i8_icmp_neg2(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_neg2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $-2, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, -2 @@ -188,9 +242,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_neg127: -; CHECK: cmpb $-127, %{{dil|cl}} define void @test_sext_i8_icmp_neg127(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_neg127: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $-127, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, -127 @@ -204,9 +262,13 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_neg128: -; CHECK: cmpb $-128, %{{dil|cl}} define void @test_sext_i8_icmp_neg128(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_neg128: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cmpb $-128, %dil +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, -128 @@ -220,11 +282,14 @@ if.end: ret void } -; CHECK-LABEL: test_sext_i8_icmp_255: -; CHECK: movb $1, -; CHECK: testb -; CHECK: je bar define void @test_sext_i8_icmp_255(i8 %x) nounwind minsize { +; CHECK-LABEL: test_sext_i8_icmp_255: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je bar # TAILCALL +; CHECK-NEXT: # BB#1: # %if.end +; CHECK-NEXT: retq entry: %sext = sext i8 %x to i32 %cmp = icmp eq i32 %sext, 255 diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index 6d51fb54f8b8..79b949a6ccb1 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -14,6 +14,7 @@ define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind { ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; X64-NEXT: movdqa %xmm0, (%rdi) ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp3 = load <8 x i16>, <8 x i16>* %old %tmp6 = shufflevector <8 x i16> %tmp3, @@ -32,6 +33,7 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; X64-NEXT: andps (%rdi), %xmm0 ; X64-NEXT: orps %xmm1, %xmm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > @@ -48,6 +50,7 @@ define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64-NEXT: pandn %xmm1, %xmm2 ; X64-NEXT: por %xmm2, %xmm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 > ret <8 x i16> %tmp } @@ -61,6 +64,7 @@ define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7] ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 > ret <8 x i16> %tmp } @@ -73,6 +77,7 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7] ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 > ret <8 x i16> %tmp } @@ -83,6 +88,7 @@ define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 > ret <8 x i16> %tmp } @@ -92,6 +98,7 @@ define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64: ## BB#0: ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 > ret <8 x i16> %tmp } @@ -102,6 +109,7 @@ define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind { ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7] ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 > ret <8 x i16> %tmp } @@ -113,6 +121,7 @@ define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind { ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] ; X64-NEXT: movdqa %xmm0, (%rdi) ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = load <2 x i64>, <2 x i64>* %A %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> %tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0 @@ -143,6 +152,7 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind { ; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X64-NEXT: movapd %xmm0, (%rdi) ; X64-NEXT: retq +; X64-NEXT: ## -- End function %tmp = load <4 x float>, <4 x float>* %r %tmp.upgrd.3 = bitcast <2 x i32>* %A to double* %tmp.upgrd.4 = load double, double* %tmp.upgrd.3 @@ -179,6 +189,7 @@ define void @t10() nounwind { ; X64-NEXT: movq _g2@{{.*}}(%rip), %rax ; X64-NEXT: movq %xmm0, (%rax) ; X64-NEXT: retq +; X64-NEXT: ## -- End function load <4 x i32>, <4 x i32>* @g1, align 16 bitcast <4 x i32> %1 to <8 x i16> shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef > @@ -196,6 +207,7 @@ define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { ; X64-NEXT: psrld $16, %xmm0 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > ret <8 x i16> %tmp7 @@ -209,6 +221,7 @@ define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3] ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef > ret <8 x i16> %tmp9 @@ -222,6 +235,7 @@ define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3] ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef > ret <8 x i16> %tmp9 @@ -234,6 +248,7 @@ define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef > ret <8 x i16> %tmp9 @@ -247,6 +262,7 @@ define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > ret <8 x i16> %tmp8 @@ -260,6 +276,7 @@ define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone { ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > @@ -275,6 +292,7 @@ define <4 x i32> @t17() nounwind { ; X64-NEXT: pxor %xmm1, %xmm1 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: retq +; X64-NEXT: ## -- End function entry: %tmp1 = load <4 x float>, <4 x float>* undef, align 16 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3> diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll index a00d47bb13e9..f937d484ce0d 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -1926,5 +1926,19 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { ret <8 x float> %6 } +define <4 x float> @stack_nofold_insertps(<8 x float> %a0, <8 x float> %a1) { +; Cannot fold this without changing the immediate. +; CHECK-LABEL: stack_nofold_insertps +; CHECK: 32-byte Spill +; CHECK: nop +; CHECK: 32-byte Reload +; CHECK: vinsertps $179, {{%xmm., %xmm., %xmm.}} + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %v0 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %v1 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v0, <4 x float> %v1, i8 179) + ret <4 x float> %res +} + attributes #0 = { "unsafe-fp-math"="false" } attributes #1 = { "unsafe-fp-math"="true" } diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll index 9f5418432abc..b8e5c82913a5 100644 --- a/test/CodeGen/X86/statepoint-allocas.ll +++ b/test/CodeGen/X86/statepoint-allocas.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; Check that we can lower a use of an alloca both as a deopt value (where the ; exact meaning is up to the consumer of the stackmap) and as an explicit spill ; slot used for GC. diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll index 6e5cdd605122..bd2dd53b654a 100644 --- a/test/CodeGen/X86/statepoint-call-lowering.ll +++ b/test/CodeGen/X86/statepoint-call-lowering.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; This file contains a collection of basic tests to ensure we didn't ; screw up normal call lowering when there are no deopt or gc arguments. diff --git a/test/CodeGen/X86/statepoint-far-call.ll b/test/CodeGen/X86/statepoint-far-call.ll index dc49061f6461..9f9b684efae8 100644 --- a/test/CodeGen/X86/statepoint-far-call.ll +++ b/test/CodeGen/X86/statepoint-far-call.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; Test to check that Statepoints with X64 far-immediate targets ; are lowered correctly to an indirect call via a scratch register. diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll index d97bc0c75602..bee4b5ac884e 100644 --- a/test/CodeGen/X86/statepoint-forward.ll +++ b/test/CodeGen/X86/statepoint-forward.ll @@ -1,5 +1,5 @@ ; RUN: opt -O3 -S < %s | FileCheck --check-prefix=CHECK-OPT %s -; RUN: llc < %s | FileCheck --check-prefix=CHECK-LLC %s +; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LLC %s ; These tests are targetted at making sure we don't retain information ; about memory which contains potential gc references across a statepoint. ; They're carefully written to only outlaw forwarding of references. diff --git a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll index 11dbe9e2e6c1..b88ca03805f2 100644 --- a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll +++ b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; This file contains a collection of basic tests to ensure we didn't ; screw up normal call lowering when a statepoint is a GC transition. diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll index 3e8b8ca49f1d..29f8e3ed4f78 100644 --- a/test/CodeGen/X86/statepoint-invoke.ll +++ b/test/CodeGen/X86/statepoint-invoke.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s 2>&1 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck %s target triple = "x86_64-pc-linux-gnu" diff --git a/test/CodeGen/X86/statepoint-live-in.ll b/test/CodeGen/X86/statepoint-live-in.ll index abe2b0a7acc8..aaa4d7c8422a 100644 --- a/test/CodeGen/X86/statepoint-live-in.ll +++ b/test/CodeGen/X86/statepoint-live-in.ll @@ -1,4 +1,5 @@ -; RUN: llc -O3 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: llc -verify-machineinstrs -O3 < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" @@ -6,38 +7,70 @@ declare void @bar() #0 declare void @baz() define void @test1(i32 %a) gc "statepoint-example" { +; CHECK-LABEL: test1: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; entry: ; We expect the argument to be passed in an extra register to bar -; CHECK-LABEL: test1 -; CHECK: pushq %rax -; CHECK-NEXT: Lcfi0: -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq _bar %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a) ret void } define void @test2(i32 %a, i32 %b) gc "statepoint-example" { +; CHECK-LABEL: test2: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: Lcfi1: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Lcfi3: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: Lcfi4: +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: Lcfi5: +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +; entry: ; Because the first call clobbers esi, we have to move the values into ; new registers. Note that they stay in the registers for both calls. -; CHECK-LABEL: @test2 -; CHECK: movl %esi, %ebx -; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: callq _bar call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %a, i32 %b) call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %b, i32 %a) ret void } define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" { +; CHECK-LABEL: test3: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Lcfi6: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; entry: -; TODO: We should have folded the reload into the statepoint. -; CHECK-LABEL: @test3 -; CHECK: pushq %rax -; CHECK-NEXT: Lcfi -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq _bar +; We directly reference the argument slot %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) ret void } @@ -47,25 +80,39 @@ entry: ; also ends up being a good test of whether we can fold loads from immutable ; stack slots into the statepoint. define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; CHECK-LABEL: test4: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Lcfi7: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; entry: -; CHECK-LABEL: test4 -; CHECK: pushq %rax -; CHECK-NEXT: Lcfi -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq _bar %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) ret void } ; A live-through gc-value must be spilled even if it is also a live-in deopt ; value. For live-in, we could technically report the register copy, but from -; a code quality perspective it's better to reuse the required stack slot so +; a code quality perspective it's better to reuse the required stack slot so ; as to put less stress on the register allocator for no benefit. define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" { +; CHECK-LABEL: test5: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Lcfi8: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rsi, (%rsp) +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +; entry: -; CHECK-LABEL: test5 -; CHECK: movq %rsi, (%rsp) -; CHECK-NEXT: callq _bar %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p) %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9) ret i32 addrspace(1)* %p2 @@ -73,14 +120,27 @@ entry: ; Show the interaction of live-through spilling followed by live-in. define void @test6(i32 %a) gc "statepoint-example" { +; CHECK-LABEL: test6: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: Lcfi9: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: Lcfi10: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: Lcfi11: +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq _baz +; CHECK-NEXT: Ltmp6: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp7: +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +; entry: -; TODO: We could have reused the previous spill slot at zero additional cost. -; CHECK-LABEL: test6 -; CHECK: movl %edi, %ebx -; CHECK: movl %ebx, 12(%rsp) -; CHECK-NEXT: callq _baz -; CHECK-NEXT: Ltmp -; CHECK-NEXT: callq _bar call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a) call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a) ret void diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll index 5c27898f284a..b16426eae3d5 100644 --- a/test/CodeGen/X86/statepoint-stack-usage.ll +++ b/test/CodeGen/X86/statepoint-stack-usage.ll @@ -1,4 +1,4 @@ -; RUN: llc -stack-symbol-ordering=0 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll index 0506381b9ec2..966f66815f92 100644 --- a/test/CodeGen/X86/statepoint-stackmap-format.ll +++ b/test/CodeGen/X86/statepoint-stackmap-format.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple="x86_64-pc-linux-gnu" | FileCheck %s -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple="x86_64-pc-unknown-elf" | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stack-symbol-ordering=0 -mtriple="x86_64-pc-linux-gnu" | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stack-symbol-ordering=0 -mtriple="x86_64-pc-unknown-elf" | FileCheck %s ; This test is a sanity check to ensure statepoints are generating StackMap ; sections correctly. This is not intended to be a rigorous test of the diff --git a/test/CodeGen/X86/statepoint-uniqueing.ll b/test/CodeGen/X86/statepoint-uniqueing.ll index e791bc6b2333..a5fa1f2d99c9 100644 --- a/test/CodeGen/X86/statepoint-uniqueing.ll +++ b/test/CodeGen/X86/statepoint-uniqueing.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; Checks for a crash we had when two gc.relocate calls would ; relocating identical values diff --git a/test/CodeGen/X86/statepoint-vector-bad-spill.ll b/test/CodeGen/X86/statepoint-vector-bad-spill.ll index 848988589cb0..7c55491bb1be 100644 --- a/test/CodeGen/X86/statepoint-vector-bad-spill.ll +++ b/test/CodeGen/X86/statepoint-vector-bad-spill.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -O3 < %s | FileCheck %s ; This is checking for a crash. diff --git a/test/CodeGen/X86/statepoint-vector.ll b/test/CodeGen/X86/statepoint-vector.ll index 000e88742880..5bc8f983ff06 100644 --- a/test/CodeGen/X86/statepoint-vector.ll +++ b/test/CodeGen/X86/statepoint-vector.ll @@ -1,4 +1,4 @@ -; RUN: llc -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s ; REQUIRES: asserts target triple = "x86_64-pc-linux-gnu" diff --git a/test/CodeGen/X86/vector-unsigned-cmp.ll b/test/CodeGen/X86/vector-unsigned-cmp.ll new file mode 100644 index 000000000000..fc246669992c --- /dev/null +++ b/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 + +; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276 +; If both operands of an unsigned icmp are known non-negative, then +; we don't need to flip the sign bits in order to map to signed pcmpgt*. + +define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SSE-LABEL: ugt_v2i64: +; SSE: # BB#0: +; SSE-NEXT: psrlq $1, %xmm0 +; SSE-NEXT: psrlq $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ugt_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> + %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> + %cmp = icmp ugt <2 x i64> %sh1, %sh2 + ret <2 x i1> %cmp +} + +define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SSE-LABEL: ult_v2i64: +; SSE: # BB#0: +; SSE-NEXT: psrlq $1, %xmm0 +; SSE-NEXT: psrlq $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ult_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> + %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> + %cmp = icmp ult <2 x i64> %sh1, %sh2 + ret <2 x i1> %cmp +} + +define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SSE-LABEL: uge_v2i64: +; SSE: # BB#0: +; SSE-NEXT: psrlq $1, %xmm0 +; SSE-NEXT: psrlq $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE-NEXT: pand %xmm3, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uge_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> + %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> + %cmp = icmp uge <2 x i64> %sh1, %sh2 + ret <2 x i1> %cmp +} + +define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SSE-LABEL: ule_v2i64: +; SSE: # BB#0: +; SSE-NEXT: psrlq $1, %xmm0 +; SSE-NEXT: psrlq $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; SSE-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE-NEXT: pand %xmm3, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ule_v2i64: +; AVX: # BB#0: +; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> + %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> + %cmp = icmp ule <2 x i64> %sh1, %sh2 + ret <2 x i1> %cmp +} + +define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SSE-LABEL: ugt_v4i32: +; SSE: # BB#0: +; SSE-NEXT: psrld $1, %xmm0 +; SSE-NEXT: psrld $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: ugt_v4i32: +; AVX1: # BB#0: +; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: ugt_v4i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq + %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> + %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> + %cmp = icmp ugt <4 x i32> %sh1, %sh2 + ret <4 x i1> %cmp +} + +define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SSE-LABEL: ult_v4i32: +; SSE: # BB#0: +; SSE-NEXT: psrld $1, %xmm0 +; SSE-NEXT: psrld $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: ult_v4i32: +; AVX1: # BB#0: +; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: ult_v4i32: +; AVX2: # BB#0: +; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq + %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> + %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> + %cmp = icmp ult <4 x i32> %sh1, %sh2 + ret <4 x i1> %cmp +} + +define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SSE2-LABEL: uge_v4i32: +; SSE2: # BB#0: +; SSE2-NEXT: psrld $1, %xmm0 +; SSE2-NEXT: psrld $1, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: uge_v4i32: +; SSE41: # BB#0: +; SSE41-NEXT: psrld $1, %xmm0 +; SSE41-NEXT: psrld $1, %xmm1 +; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: uge_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> + %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> + %cmp = icmp uge <4 x i32> %sh1, %sh2 + ret <4 x i1> %cmp +} + +define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SSE2-LABEL: ule_v4i32: +; SSE2: # BB#0: +; SSE2-NEXT: psrld $1, %xmm0 +; SSE2-NEXT: psrld $1, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: ule_v4i32: +; SSE41: # BB#0: +; SSE41-NEXT: psrld $1, %xmm0 +; SSE41-NEXT: psrld $1, %xmm1 +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: ule_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> + %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> + %cmp = icmp ule <4 x i32> %sh1, %sh2 + ret <4 x i1> %cmp +} + +define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SSE-LABEL: ugt_v8i16: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ugt_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %cmp = icmp ugt <8 x i16> %sh1, %sh2 + ret <8 x i1> %cmp +} + +define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SSE-LABEL: ult_v8i16: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pcmpgtw %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ult_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %cmp = icmp ult <8 x i16> %sh1, %sh2 + ret <8 x i1> %cmp +} + +define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SSE2-LABEL: uge_v8i16: +; SSE2: # BB#0: +; SSE2-NEXT: psrlw $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm1 +; SSE2-NEXT: psubusw %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: uge_v8i16: +; SSE41: # BB#0: +; SSE41-NEXT: psrlw $1, %xmm0 +; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: pmaxuw %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: uge_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %cmp = icmp uge <8 x i16> %sh1, %sh2 + ret <8 x i1> %cmp +} + +define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SSE2-LABEL: ule_v8i16: +; SSE2: # BB#0: +; SSE2-NEXT: psrlw $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm1 +; SSE2-NEXT: psubusw %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: ule_v8i16: +; SSE41: # BB#0: +; SSE41-NEXT: psrlw $1, %xmm0 +; SSE41-NEXT: psrlw $1, %xmm1 +; SSE41-NEXT: pminuw %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: ule_v8i16: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + %cmp = icmp ule <8 x i16> %sh1, %sh2 + ret <8 x i1> %cmp +} + +define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SSE-LABEL: ugt_v16i8: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; SSE-NEXT: por %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ugt_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %cmp = icmp ugt <16 x i8> %sh1, %sh2 + ret <16 x i1> %cmp +} + +define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SSE-LABEL: ult_v16i8: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pcmpgtb %xmm0, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ult_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %cmp = icmp ult <16 x i8> %sh1, %sh2 + ret <16 x i1> %cmp +} + +define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SSE-LABEL: uge_v16i8: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pmaxub %xmm0, %xmm1 +; SSE-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uge_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %cmp = icmp uge <16 x i8> %sh1, %sh2 + ret <16 x i1> %cmp +} + +define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SSE-LABEL: ule_v16i8: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pminub %xmm0, %xmm1 +; SSE-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: ule_v16i8: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm1 +; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %cmp = icmp ule <16 x i8> %sh1, %sh2 + ret <16 x i1> %cmp +} + diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll index f51f917fbac9..99e03c891c00 100644 --- a/test/CodeGen/X86/wide-fma-contraction.ll +++ b/test/CodeGen/X86/wide-fma-contraction.ll @@ -1,26 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma -mtriple=x86_64-apple-darwin < %s | FileCheck %s ; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA ; CHECK-LABEL: fmafunc ; CHECK-NOFMA-LABEL: fmafunc define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) { +; CHECK-LABEL: fmafunc: +; CHECK: ## BB#0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: Lcfi1: +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: andl $-32, %esp +; CHECK-NEXT: subl $32, %esp +; CHECK-NEXT: vfmaddps 8(%ebp), %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vfmaddps 40(%ebp), %ymm3, %ymm1, %ymm1 +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; +; CHECK-NOFMA-LABEL: fmafunc: +; CHECK-NOFMA: ## BB#0: +; CHECK-NOFMA-NEXT: pushl %ebp +; CHECK-NOFMA-NEXT: Lcfi0: +; CHECK-NOFMA-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NOFMA-NEXT: Lcfi1: +; CHECK-NOFMA-NEXT: .cfi_offset %ebp, -8 +; CHECK-NOFMA-NEXT: movl %esp, %ebp +; CHECK-NOFMA-NEXT: Lcfi2: +; CHECK-NOFMA-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NOFMA-NEXT: andl $-32, %esp +; CHECK-NOFMA-NEXT: subl $32, %esp +; CHECK-NOFMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 +; CHECK-NOFMA-NEXT: vaddps 8(%ebp), %ymm0, %ymm0 +; CHECK-NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; CHECK-NOFMA-NEXT: vaddps 40(%ebp), %ymm1, %ymm1 +; CHECK-NOFMA-NEXT: movl %ebp, %esp +; CHECK-NOFMA-NEXT: popl %ebp +; CHECK-NOFMA-NEXT: retl -; CHECK-NOT: vmulps -; CHECK-NOT: vaddps -; CHECK: vfmaddps -; CHECK-NOT: vmulps -; CHECK-NOT: vaddps -; CHECK: vfmaddps -; CHECK-NOT: vmulps -; CHECK-NOT: vaddps - -; CHECK-NOFMA-NOT: calll -; CHECK-NOFMA: vmulps -; CHECK-NOFMA: vaddps -; CHECK-NOFMA-NOT: calll -; CHECK-NOFMA: vmulps -; CHECK-NOFMA: vaddps -; CHECK-NOFMA-NOT: calll %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) ret <16 x float> %ret diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll index 397e5bc10f5b..cd58dd1e7604 100644 --- a/test/CodeGen/X86/xor-icmp.ll +++ b/test/CodeGen/X86/xor-icmp.ll @@ -1,21 +1,33 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 ; rdar://7367229 define i32 @t(i32 %a, i32 %b) nounwind ssp { +; X32-LABEL: t: +; X32: # BB#0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: xorb {{[0-9]+}}(%esp), %al +; X32-NEXT: testb $64, %al +; X32-NEXT: je .LBB0_1 +; X32-NEXT: # BB#2: # %bb1 +; X32-NEXT: jmp bar # TAILCALL +; X32-NEXT: .LBB0_1: # %bb +; X32-NEXT: jmp foo # TAILCALL +; +; X64-LABEL: t: +; X64: # BB#0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: xorl %esi, %eax +; X64-NEXT: testb $64, %ah +; X64-NEXT: je .LBB0_1 +; X64-NEXT: # BB#2: # %bb1 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp bar # TAILCALL +; X64-NEXT: .LBB0_1: # %bb +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp foo # TAILCALL entry: -; X32-LABEL: t: -; X32: xorb -; X32-NOT: andb -; X32-NOT: shrb -; X32: testb $64 -; X32: je - -; X64-LABEL: t: -; X64-NOT: setne -; X64: xorl -; X64: testb $64 -; X64: je %0 = and i32 %a, 16384 %1 = icmp ne i32 %0, 0 %2 = and i32 %b, 16384 @@ -38,20 +50,32 @@ declare i32 @bar(...) define i32 @t2(i32 %x, i32 %y) nounwind ssp { ; X32-LABEL: t2: -; X32: cmpl -; X32: sete -; X32: cmpl -; X32: sete -; X32-NOT: xor -; X32: je - +; X32: # BB#0: # %entry +; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X32-NEXT: sete %al +; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X32-NEXT: sete %cl +; X32-NEXT: cmpb %al, %cl +; X32-NEXT: je .LBB1_1 +; X32-NEXT: # BB#2: # %bb +; X32-NEXT: jmp foo # TAILCALL +; X32-NEXT: .LBB1_1: # %return +; X32-NEXT: retl +; ; X64-LABEL: t2: -; X64: testl -; X64: sete -; X64: testl -; X64: sete -; X64-NOT: xor -; X64: je +; X64: # BB#0: # %entry +; X64-NEXT: testl %edi, %edi +; X64-NEXT: sete %al +; X64-NEXT: testl %esi, %esi +; X64-NEXT: sete %cl +; X64-NEXT: cmpb %al, %cl +; X64-NEXT: je .LBB1_1 +; X64-NEXT: # BB#2: # %bb +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: jmp foo # TAILCALL +; X64-NEXT: .LBB1_1: # %return +; X64-NEXT: retq + entry: %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] %1 = icmp eq i32 %y, 0 ; <i1> [#uses=1] diff --git a/test/DebugInfo/MIR/AArch64/clobber-sp.mir b/test/DebugInfo/MIR/AArch64/clobber-sp.mir new file mode 100644 index 000000000000..444faee81cb3 --- /dev/null +++ b/test/DebugInfo/MIR/AArch64/clobber-sp.mir @@ -0,0 +1,181 @@ +# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \ +# RUN: | llvm-dwarfdump - | FileCheck %s +# CHECK: .debug_info contents: +# CHECK: DW_TAG_formal_parameter +# CHECK: DW_TAG_formal_parameter +# CHECK-NEXT: DW_AT_location [DW_FORM_data4] ([[LOC:.*]]) +# CHECK-NEXT: DW_AT_name {{.*}}"y" +# CHECK: .debug_loc contents: +# CHECK: [[LOC]]: +# CHECK-SAME: Beginning address offset: 0x0000000000000000 +# CHECK-NEXT: Ending address offset: 0x0000000000000014 +# CHECK-NEXT: Location description: 51 +# reg1 +# +# The range of y's [SP+8] location must not be interrupted by the call to h. +# CHECK: Beginning address offset: 0x0000000000000014 +# CHECK-NEXT: Ending address offset: 0x0000000000000038 +# CHECK-NEXT: Location description: 8f 08 +# breg31 +8 +--- | + ; Generated at -Os from: + ; struct Rect { + ; double x, y, w, h; + ; }; + ; void g(struct Rect); + ; void h(int *); + ; int f(int x, int y, struct Rect s) { + ; g(s); + ; if (y) + ; h(&x); + ; return 0; + ; } + source_filename = "/tmp/clobber.c" + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios" + + %struct.Rect = type { double, double, double, double } + + ; Function Attrs: nounwind optsize ssp + define i32 @f(i32 %x, i32 %y, [4 x double] %s.coerce) local_unnamed_addr #0 !dbg !7 { + entry: + %x.addr = alloca i32, align 4 + tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !19, metadata !22), !dbg !23 + store i32 %x, i32* %x.addr, align 4, !tbaa !24 + tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !20, metadata !22), !dbg !28 + tail call void @llvm.dbg.declare(metadata %struct.Rect* undef, metadata !21, metadata !22), !dbg !29 + tail call void @g([4 x double] %s.coerce) #4, !dbg !30 + %tobool = icmp eq i32 %y, 0, !dbg !31 + br i1 %tobool, label %if.end, label %if.then, !dbg !33 + + if.then: ; preds = %entry + tail call void @llvm.dbg.value(metadata i32* %x.addr, i64 0, metadata !19, metadata !22), !dbg !23 + call void @h(i32* nonnull %x.addr) #4, !dbg !34 + br label %if.end, !dbg !34 + + if.end: ; preds = %if.then, %entry + ret i32 0, !dbg !35 + } + + declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + declare void @g([4 x double]) local_unnamed_addr #2 + declare void @h(i32*) local_unnamed_addr #2 + declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 + declare void @llvm.stackprotector(i8*, i8**) #3 + + attributes #0 = { nounwind optsize ssp } + attributes #1 = { nounwind readnone speculatable } + attributes #2 = { optsize } + attributes #3 = { nounwind } + attributes #4 = { nounwind optsize } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4, !5} + !llvm.ident = !{!6} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 302682) (llvm/trunk 302683)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) + !1 = !DIFile(filename: "/tmp/clobber.c", directory: "/Volumes/Data/apple-internal/swift") + !2 = !{} + !3 = !{i32 2, !"Dwarf Version", i32 2} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !{i32 1, !"PIC Level", i32 2} + !6 = !{!"clang version 5.0.0 (trunk 302682) (llvm/trunk 302683)"} + !7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 7, type: !8, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !18) + !8 = !DISubroutineType(types: !9) + !9 = !{!10, !10, !10, !11} + !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Rect", file: !1, line: 1, size: 256, elements: !12) + !12 = !{!13, !15, !16, !17} + !13 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !11, file: !1, line: 2, baseType: !14, size: 64) + !14 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) + !15 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !11, file: !1, line: 2, baseType: !14, size: 64, offset: 64) + !16 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !11, file: !1, line: 2, baseType: !14, size: 64, offset: 128) + !17 = !DIDerivedType(tag: DW_TAG_member, name: "h", scope: !11, file: !1, line: 2, baseType: !14, size: 64, offset: 192) + !18 = !{!19, !20, !21} + !19 = !DILocalVariable(name: "x", arg: 1, scope: !7, file: !1, line: 7, type: !10) + !20 = !DILocalVariable(name: "y", arg: 2, scope: !7, file: !1, line: 7, type: !10) + !21 = !DILocalVariable(name: "s", arg: 3, scope: !7, file: !1, line: 7, type: !11) + !22 = !DIExpression() + !23 = !DILocation(line: 7, column: 11, scope: !7) + !24 = !{!25, !25, i64 0} + !25 = !{!"int", !26, i64 0} + !26 = !{!"omnipotent char", !27, i64 0} + !27 = !{!"Simple C/C++ TBAA"} + !28 = !DILocation(line: 7, column: 18, scope: !7) + !29 = !DILocation(line: 7, column: 33, scope: !7) + !30 = !DILocation(line: 8, column: 3, scope: !7) + !31 = !DILocation(line: 9, column: 7, scope: !32) + !32 = distinct !DILexicalBlock(scope: !7, file: !1, line: 9, column: 7) + !33 = !DILocation(line: 9, column: 7, scope: !7) + !34 = !DILocation(line: 10, column: 5, scope: !32) + !35 = !DILocation(line: 12, column: 3, scope: !7) + +... +--- +name: f +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%w0' } + - { reg: '%w1' } + - { reg: '%d0' } + - { reg: '%d1' } + - { reg: '%d2' } + - { reg: '%d3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 32 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +stack: + - { id: 0, name: x.addr, offset: -20, size: 4, alignment: 4, local-offset: -4 } + - { id: 1, type: spill-slot, offset: -24, size: 4, alignment: 4 } + - { id: 2, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '%lr' } + - { id: 3, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '%fp' } +body: | + bb.0.entry: + successors: %bb.2.if.end(0x40000000), %bb.1.if.then(0x40000000) + liveins: %w0, %w1, %d0, %d1, %d2, %d3, %lr + + %sp = frame-setup SUBXri %sp, 32, 0 + frame-setup STPXi killed %fp, killed %lr, %sp, 2 :: (store 8 into %stack.3), (store 8 into %stack.2) + %fp = frame-setup ADDXri %sp, 16, 0 + DBG_VALUE debug-use %w0, debug-use _, !19, !22, debug-location !23 + STURWi killed %w0, %fp, -4 :: (store 4 into %stack.0.x.addr) + DBG_VALUE debug-use %w1, debug-use _, !20, !22, debug-location !28 + STRWui killed %w1, %sp, 2, debug-location !30 :: (store 4 into %stack.1) + DBG_VALUE %sp, 8, !20, !22, debug-location !28 + BL @g, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %d0, implicit killed %d1, implicit killed %d2, implicit killed %d3, implicit-def %sp, debug-location !30 + %w0 = LDRWui %sp, 2, debug-location !33 :: (load 4 from %stack.1) + CBZW killed %w0, %bb.2.if.end, debug-location !33 + + bb.1.if.then: + successors: %bb.2.if.end(0x80000000) + + DBG_VALUE debug-use %sp, 8, !20, !22, debug-location !28 + %x0 = SUBXri %fp, 4, 0 + DBG_VALUE debug-use %x0, debug-use _, !19, !22, debug-location !23 + BL @h, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %x0, debug-location !34 + + bb.2.if.end: + DBG_VALUE debug-use %sp, 8, !20, !22, debug-location !28 + %w8 = MOVZWi 0, 0 + %x0 = ORRXrs %xzr, undef %x8, 0, implicit killed %w8, debug-location !35 + %fp, %lr = LDPXi %sp, 2, debug-location !35 :: (load 8 from %stack.3), (load 8 from %stack.2) + %sp = ADDXri %sp, 32, 0, debug-location !35 + RET undef %lr, implicit killed %w0, debug-location !35 + +... diff --git a/test/DebugInfo/MIR/AArch64/lit.local.cfg b/test/DebugInfo/MIR/AArch64/lit.local.cfg new file mode 100644 index 000000000000..cec29af5bbe4 --- /dev/null +++ b/test/DebugInfo/MIR/AArch64/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True + diff --git a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml index 66030020f8f4..d1324d26d8bb 100644 --- a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml +++ b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml @@ -5,39 +5,40 @@ DbiStream: ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj' SourceFiles: - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' - LineInfo: - Checksums: - - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' - Kind: MD5 - Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC - - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h' - Kind: MD5 - Checksum: 1154D69F5B2650196E1FC34F4134E56B - Lines: - - CodeSize: 10 - Flags: [ ] - RelocOffset: 16 - RelocSegment: 1 - Blocks: - - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' - Lines: - - Offset: 0 - LineStart: 5 - IsStatement: true - EndDelta: 0 - - Offset: 3 - LineStart: 6 - IsStatement: true - EndDelta: 0 - - Offset: 8 - LineStart: 7 - IsStatement: true - EndDelta: 0 - Columns: - InlineeLines: - - HasExtraFiles: false - Sites: - - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h' - LineNum: 26950 - Inlinee: 22767 + Subsections: + - !FileChecksums + Checksums: + - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' + Kind: MD5 + Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC + - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h' + Kind: MD5 + Checksum: 1154D69F5B2650196E1FC34F4134E56B + - !Lines + CodeSize: 10 + Flags: [ ] + RelocOffset: 16 + RelocSegment: 1 + Blocks: + - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' + Lines: + - Offset: 0 + LineStart: 5 + IsStatement: true + EndDelta: 0 + - Offset: 3 + LineStart: 6 + IsStatement: true + EndDelta: 0 + - Offset: 8 + LineStart: 7 + IsStatement: true + EndDelta: 0 + Columns: + - !InlineeLines + HasExtraFiles: false + Sites: + - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h' + LineNum: 26950 + Inlinee: 22767 ... diff --git a/test/DebugInfo/PDB/pdbdump-write.test b/test/DebugInfo/PDB/pdbdump-write.test index f56b4fbe3624..393473a53af1 100644 --- a/test/DebugInfo/PDB/pdbdump-write.test +++ b/test/DebugInfo/PDB/pdbdump-write.test @@ -11,10 +11,10 @@ ; (for example if we don't write the entire stream) ; ; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory \ -; RUN: -pdb-stream -tpi-stream %p/Inputs/empty.pdb > %t.1 +; RUN: -pdb-stream -tpi-stream -dbi-module-syms %p/Inputs/empty.pdb > %t.1 ; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1 ; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \ -; RUN: -no-file-headers %p/Inputs/empty.pdb > %t.3 +; RUN: -dbi-module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3 ; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \ -; RUN: -no-file-headers %t.2 > %t.4 +; RUN: -dbi-module-syms -no-file-headers %t.2 > %t.4 ; RUN: diff %t.3 %t.4 diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test index ca7427c0099b..f959805c7474 100644 --- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test +++ b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test @@ -28,12 +28,8 @@ YAML: - Module: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj' YAML: ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj' YAML: SourceFiles: YAML: - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' -YAML: LineInfo: -YAML: Checksums: -YAML: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' -YAML: Kind: MD5 -YAML: Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC -YAML: Lines: +YAML: Subsections: +YAML: - !Lines YAML: CodeSize: 10 YAML: Flags: [ ] YAML: RelocOffset: 16 @@ -54,6 +50,11 @@ YAML: LineStart: 7 YAML: IsStatement: true YAML: EndDelta: 0 YAML: Columns: +YAML: - !FileChecksums +YAML: Checksums: +YAML: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp' +YAML: Kind: MD5 +YAML: Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC YAML: - Module: '* Linker *' YAML: ObjFile: '' YAML: ...
\ No newline at end of file diff --git a/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll b/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll new file mode 100644 index 000000000000..4df6ffeb5a8c --- /dev/null +++ b/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll @@ -0,0 +1,13 @@ +; Test -sanitizer-coverage-inline-8bit-counters=1 +; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +define void @foo() { +entry: +; CHECK: %0 = load i8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__sancov_gen_, i64 0, i64 0), !nosanitize +; CHECK: %1 = add i8 %0, 1 +; CHECK: store i8 %1, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__sancov_gen_, i64 0, i64 0), !nosanitize + ret void +} +; CHECK: call void @__sanitizer_cov_8bit_counters_init(i8* bitcast (i8** @__start___sancov_counters to i8*), i8* bitcast (i8** @__stop___sancov_counters to i8*)) diff --git a/test/MC/WebAssembly/external-data.ll b/test/MC/WebAssembly/external-data.ll new file mode 100644 index 000000000000..91e05b3f13a6 --- /dev/null +++ b/test/MC/WebAssembly/external-data.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | obj2yaml | FileCheck %s +; Verify relocations are correctly generated for addresses of externals +; in the data section. + +declare i32 @f1(...) + +@foo = global i64 7, align 4 +@far = local_unnamed_addr global i32 (...)* @f1, align 4 + +; CHECK: - Type: DATA +; CHECK: Relocations: +; CHECK: - Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32 +; CHECK: Index: 0 +; CHECK: Offset: 0x0000000E +; CHECK: Segments: +; CHECK: - Index: 0 +; CHECK: Offset: +; CHECK: Opcode: I32_CONST +; CHECK: Value: 0 +; CHECK: Content: 0700000000000000FFFFFFFF + diff --git a/test/ThinLTO/X86/deadstrip.ll b/test/ThinLTO/X86/deadstrip.ll index 0c85322eb565..c19ccb01be3c 100644 --- a/test/ThinLTO/X86/deadstrip.ll +++ b/test/ThinLTO/X86/deadstrip.ll @@ -22,6 +22,20 @@ ; RUN: llvm-dis < %t.out.1.3.import.bc | FileCheck %s --check-prefix=CHECK2 ; RUN: llvm-nm %t.out.1 | FileCheck %s --check-prefix=CHECK2-NM +; RUN: llvm-bcanalyzer -dump %t.out.index.bc | FileCheck %s --check-prefix=COMBINED +; Live, NotEligibleForImport, Internal +; COMBINED-DAG: <COMBINED {{.*}} op2=55 +; Live, Internal +; COMBINED-DAG: <COMBINED {{.*}} op2=39 +; Live, External +; COMBINED-DAG: <COMBINED {{.*}} op2=32 +; COMBINED-DAG: <COMBINED {{.*}} op2=32 +; COMBINED-DAG: <COMBINED {{.*}} op2=32 +; (Dead) +; COMBINED-DAG: <COMBINED {{.*}} op2=0 +; COMBINED-DAG: <COMBINED {{.*}} op2=0 +; COMBINED-DAG: <COMBINED {{.*}} op2=0 + ; Dead-stripping on the index allows to internalize these, ; and limit the import of @baz thanks to early pruning. ; CHECK-NOT: available_externally {{.*}} @baz() @@ -35,7 +49,7 @@ ; Make sure we didn't internalize @boo, which is reachable via ; llvm.global_ctors ; CHECK2: define void @boo() -; We should have eventually revoved @baz since it was internalized and unused +; We should have eventually removed @baz since it was internalized and unused ; CHECK2-NM-NOT: _baz ; The final binary should not contain any of the dead functions, diff --git a/test/ThinLTO/X86/newpm-basic.ll b/test/ThinLTO/X86/newpm-basic.ll index d357cbc85d00..bfcc60c6807b 100644 --- a/test/ThinLTO/X86/newpm-basic.ll +++ b/test/ThinLTO/X86/newpm-basic.ll @@ -1,7 +1,7 @@ ; RUN: opt -module-summary %s -o %t1.bc ; RUN: llvm-lto2 run %t1.bc -o %t.o \ ; RUN: -r=%t1.bc,_tinkywinky,pxl \ -; RUN: -lto-use-new-pm +; RUN: -use-new-pm target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/test/Transforms/CodeExtractor/cost.ll b/test/Transforms/CodeExtractor/cost.ll new file mode 100644 index 000000000000..4ac5acee019a --- /dev/null +++ b/test/Transforms/CodeExtractor/cost.ll @@ -0,0 +1,64 @@ +; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s +; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s +define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr { +bb: +; ptr != null is predicted to be true + %tmp = icmp ne i32* %arg, null + br i1 %tmp, label %bb8, label %bb1 + +; bb1 is not likely +bb1: ; preds = %bb + %tmp2 = tail call i32 @foo(i32* nonnull %arg) + %tmp3 = tail call i32 @foo(i32* nonnull %arg) + %tmp4 = tail call i32 @foo(i32* nonnull %arg) + %tmp5 = tail call i32 @foo(i32* nonnull %arg) + %tmp6 = tail call i32 @foo(i32* nonnull %arg) + %tmp7 = tail call i32 @foo(i32* nonnull %arg) + br label %bb8 + +bb8: ; preds = %bb1, %bb + %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp9 +} + +define i32 @outline_region_likely(i32* %arg) local_unnamed_addr { +bb: +; ptr == null is predicted to be false + %tmp = icmp eq i32* %arg, null + br i1 %tmp, label %bb8, label %bb1 + +; bb1 is likely +bb1: ; preds = %bb + %tmp2 = tail call i32 @foo(i32* nonnull %arg) + %tmp3 = tail call i32 @foo(i32* nonnull %arg) + %tmp4 = tail call i32 @foo(i32* nonnull %arg) + %tmp5 = tail call i32 @foo(i32* nonnull %arg) + %tmp6 = tail call i32 @foo(i32* nonnull %arg) + %tmp7 = tail call i32 @foo(i32* nonnull %arg) + br label %bb8 + +bb8: ; preds = %bb1, %bb + %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp9 +} + +declare i32 @foo(i32* %arg) + +define i32 @dummy_caller(i32* %arg) local_unnamed_addr { +; CHECK-LABEL: @dummy_caller + %tmp = call i32 @outline_region_notlikely(i32* %arg) +; CHECK: call void @outline_region_notlikely.2_bb1 + %tmp2 = tail call i32 @outline_region_likely(i32* %arg) +; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg) + ret i32 %tmp + +} + +; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) { +; CHECK-NEXT: newFuncRoot: + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} diff --git a/test/Transforms/CodeExtractor/cost_meta.ll b/test/Transforms/CodeExtractor/cost_meta.ll new file mode 100644 index 000000000000..2e4467a8d0c9 --- /dev/null +++ b/test/Transforms/CodeExtractor/cost_meta.ll @@ -0,0 +1,41 @@ +; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s +; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s +define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr { +bb: +; ptr != null is predicted to be true + %tmp = icmp ne i32* %arg, null + br i1 %tmp, label %bb8, label %bb1, !prof !2 + +; bb1 is not likely +bb1: ; preds = %bb + %tmp2 = tail call i32 @foo(i32* nonnull %arg) + %tmp3 = tail call i32 @foo(i32* nonnull %arg) + %tmp4 = tail call i32 @foo(i32* nonnull %arg) + %tmp5 = tail call i32 @foo(i32* nonnull %arg) + %tmp6 = tail call i32 @foo(i32* nonnull %arg) + %tmp7 = tail call i32 @foo(i32* nonnull %arg) + br label %bb8 + +bb8: ; preds = %bb1, %bb + %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp9 +} + +define i32 @dummy_caller(i32* %arg) local_unnamed_addr { +; CHECK-LABEL: @dummy_caller + %tmp = call i32 @outline_region_notlikely(i32* %arg) + ret i32 %tmp + } + + +; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) { +; CHECK-NEXT: newFuncRoot: + +declare i32 @foo(i32 * %arg) + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 304489)"} +!2 = !{!"branch_weights", i32 2000, i32 1} diff --git a/test/Transforms/Coroutines/coro-split-02.ll b/test/Transforms/Coroutines/coro-split-02.ll index 953c25088652..4dc8921cd69a 100644 --- a/test/Transforms/Coroutines/coro-split-02.ll +++ b/test/Transforms/Coroutines/coro-split-02.ll @@ -1,5 +1,6 @@ ; Tests that coro-split can handle the case when a code after coro.suspend uses ; a value produces between coro.save and coro.suspend (%Result.i19) +; and checks whether stray coro.saves are properly removed ; RUN: opt < %s -coro-split -S | FileCheck %s %"struct.std::coroutine_handle" = type { i8* } @@ -24,9 +25,10 @@ entry: i8 1, label %exit ] await.ready: + %StrayCoroSave = call token @llvm.coro.save(i8* null) %val = load i32, i32* %Result.i19 call void @print(i32 %val) - br label %exit + br label %exit exit: call i1 @llvm.coro.end(i8* null, i1 false) ret void @@ -35,6 +37,7 @@ exit: ; CHECK-LABEL: @a.resume( ; CHECK: getelementptr inbounds %a.Frame ; CHECK-NEXT: getelementptr inbounds %"struct.lean_future<int>::Awaiter" +; CHECK-NOT: call token @llvm.coro.save(i8* null) ; CHECK-NEXT: %val = load i32, i32* %Result ; CHECK-NEXT: call void @print(i32 %val) ; CHECK-NEXT: ret void diff --git a/test/Transforms/Inline/AArch64/switch.ll b/test/Transforms/Inline/AArch64/switch.ll index 96d6bf2db682..a530ba734705 100644 --- a/test/Transforms/Inline/AArch64/switch.ll +++ b/test/Transforms/Inline/AArch64/switch.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s -; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s +; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux | FileCheck %s define i32 @callee_range(i32 %a, i32* %P) { switch i32 %a, label %sw.default [ diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll index 6ff0a50318d2..8352c07a816b 100644 --- a/test/Transforms/InstCombine/not.ll +++ b/test/Transforms/InstCombine/not.ll @@ -33,17 +33,46 @@ define i1 @invert_fcmp(float %X, float %Y) { ; PR2298 -define zeroext i8 @test6(i32 %a, i32 %b) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 %b, %a -; CHECK-NEXT: [[RETVAL67:%.*]] = zext i1 [[TMP3]] to i8 -; CHECK-NEXT: ret i8 [[RETVAL67]] +define i1 @not_not_cmp(i32 %a, i32 %b) { +; CHECK-LABEL: @not_not_cmp( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %b, %a +; CHECK-NEXT: ret i1 [[CMP]] +; + %nota = xor i32 %a, -1 + %notb = xor i32 %b, -1 + %cmp = icmp slt i32 %nota, %notb + ret i1 %cmp +} + +define <2 x i1> @not_not_cmp_vector(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @not_not_cmp_vector( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> %b, %a +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %nota = xor <2 x i32> %a, <i32 -1, i32 -1> + %notb = xor <2 x i32> %b, <i32 -1, i32 -1> + %cmp = icmp ugt <2 x i32> %nota, %notb + ret <2 x i1> %cmp +} + +define i1 @not_cmp_constant(i32 %a) { +; CHECK-LABEL: @not_cmp_constant( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %a, -43 +; CHECK-NEXT: ret i1 [[CMP]] +; + %nota = xor i32 %a, -1 + %cmp = icmp ugt i32 %nota, 42 + ret i1 %cmp +} + +define <2 x i1> @not_cmp_constant_vector(<2 x i32> %a) { +; CHECK-LABEL: @not_cmp_constant_vector( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> %a, <i32 -43, i32 -43> +; CHECK-NEXT: ret <2 x i1> [[CMP]] ; - %tmp1not = xor i32 %a, -1 - %tmp2not = xor i32 %b, -1 - %tmp3 = icmp slt i32 %tmp1not, %tmp2not - %retval67 = zext i1 %tmp3 to i8 - ret i8 %retval67 + %nota = xor <2 x i32> %a, <i32 -1, i32 -1> + %cmp = icmp slt <2 x i32> %nota, <i32 42, i32 42> + ret <2 x i1> %cmp } define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) { diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll index d6f1b634102f..20ebd36991a5 100644 --- a/test/Transforms/InstSimplify/compare.ll +++ b/test/Transforms/InstSimplify/compare.ll @@ -1278,3 +1278,19 @@ define void @icmp_slt_sge_or(i32 %Ax, i32 %Bx) { ; CHECK: call void @helper_i1(i1 true) ret void } + +define i1 @constant_fold_inttoptr_null() { +; CHECK-LABEL: @constant_fold_inttoptr_null( +; CHECK-NEXT: ret i1 false +; + %x = icmp eq i32* inttoptr (i64 32 to i32*), null + ret i1 %x +} + +define i1 @constant_fold_null_inttoptr() { +; CHECK-LABEL: @constant_fold_null_inttoptr( +; CHECK-NEXT: ret i1 false +; + %x = icmp eq i32* null, inttoptr (i64 32 to i32*) + ret i1 %x +} diff --git a/test/Transforms/LowerExpectIntrinsic/phi_merge.ll b/test/Transforms/LowerExpectIntrinsic/phi_merge.ll new file mode 100644 index 000000000000..3b407c0f3a5a --- /dev/null +++ b/test/Transforms/LowerExpectIntrinsic/phi_merge.ll @@ -0,0 +1,356 @@ +; RUN: opt -lower-expect -S -o - < %s | FileCheck %s +; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s + +; The C case +; if (__builtin_expect((x > goo() && y > hoo() && z > too()), 1)) +; For the above case, all 3 branches should be annotated. +; +; if (__builtin_expect((x > goo() && y > hoo() && z > too()), 0)) +; For the above case, we don't have enough information, so +; only the last branch is annotated. + +define void @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: !prof [[WEIGHT:![0-9]+]] + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13, {{.*}}!prof [[WEIGHT]] + +bb14: ; preds = %bb10 + %tmp16 = call i32 @too() + %tmp17 = icmp sgt i32 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i1 [ false, %bb10 ], [ false, %bb ], [ %tmp17, %bb14 ] + %tmp20 = xor i1 %tmp19, true + %tmp21 = xor i1 %tmp20, true + %tmp22 = zext i1 %tmp21 to i32 + %tmp23 = sext i32 %tmp22 to i64 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 1) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 + +bb30: ; preds = %bb28, %bb26 + ret void +} + +define void @foo2(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo2 +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: br i1 %tmp9 +; CHECK-NOT: !prof + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13 +; CHECK-NOT: !prof + +bb14: ; preds = %bb10 + %tmp16 = call i32 @too() + %tmp17 = icmp sgt i32 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i1 [ false, %bb10 ], [ false, %bb ], [ %tmp17, %bb14 ] + %tmp20 = xor i1 %tmp19, true + %tmp21 = xor i1 %tmp20, true + %tmp22 = zext i1 %tmp21 to i32 + %tmp23 = sext i32 %tmp22 to i64 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 0) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT2:![0-9]+]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 + +bb30: ; preds = %bb28, %bb26 + ret void +} + +define void @foo_i32(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo_i32 +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: !prof [[WEIGHT]] + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13, {{.*}}!prof [[WEIGHT]] + +bb14: ; preds = %bb10 + %tmp16 = call i32 @too() + %tmp17 = icmp sgt i32 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i32 [ 5, %bb10 ], [ 5, %bb ], [ %tmp16, %bb14 ] + %tmp23 = sext i32 %tmp19 to i64 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 + +bb30: ; preds = %bb28, %bb26 + ret void +} + + +define void @foo_i32_not_unlikely(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo_i32_not_unlikely +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: br i1 %tmp9 +; CHECK-NOT: !prof + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13 +; CHECK-NOT: !prof + +bb14: ; preds = %bb10 + %tmp16 = call i32 @too() + %tmp17 = icmp sgt i32 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i32 [ 4, %bb10 ], [ 4, %bb ], [ %tmp16, %bb14 ] + %tmp23 = sext i32 %tmp19 to i64 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 + +bb30: ; preds = %bb28, %bb26 + ret void +} + +define void @foo_i32_xor(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo_i32_xor +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: br i1 %tmp9,{{.*}}!prof [[WEIGHT]] + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13,{{.*}}!prof [[WEIGHT]] + +bb14: ; preds = %bb10 + %tmp16 = call i32 @too() + %tmp17 = icmp sgt i32 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i32 [ 6, %bb10 ], [ 6, %bb ], [ %tmp16, %bb14 ] + %tmp20 = xor i32 %tmp19, 3 + %tmp23 = sext i32 %tmp20 to i64 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 +bb30: ; preds = %bb28, %bb26 + ret void +} + +define void @foo_i8_sext(i32 %arg, i32 %arg1, i8 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo_i8_sext +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: br i1 %tmp9,{{.*}}!prof [[WEIGHT]] + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13,{{.*}}!prof [[WEIGHT]] + +bb14: ; preds = %bb10 + %tmp16 = call i8 @too8() + %tmp17 = icmp sgt i8 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i8 [ 255, %bb10 ], [ 255, %bb ], [ %tmp16, %bb14 ] + %tmp23 = sext i8 %tmp19 to i64 +; after sign extension, the operand value becomes -1 which does not match 255 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 255) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 +bb30: ; preds = %bb28, %bb26 + ret void +} + +define void @foo_i8_sext_not_unlikely(i32 %arg, i32 %arg1, i8 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo_i8_sext_not_unlikely +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: br i1 %tmp9 +; CHECK-NOT: !prof + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13 +; CHECK-NOT: !prof + +bb14: ; preds = %bb10 + %tmp16 = call i8 @too8() + %tmp17 = icmp sgt i8 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i8 [ 255, %bb10 ], [ 255, %bb ], [ %tmp16, %bb14 ] + %tmp23 = sext i8 %tmp19 to i64 +; after sign extension, the operand value becomes -1 which matches -1 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 -1) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 +bb30: ; preds = %bb28, %bb26 + ret void +} + + +define void @foo_i32_xor_not_unlikely(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo_i32_xor_not_unlikely +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp sgt i32 %tmp8, %arg + br i1 %tmp9, label %bb10, label %bb18 +; CHECK: br i1 %tmp9 +; CHECK-NOT: !prof + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br i1 %tmp13, label %bb14, label %bb18 +; CHECK: br i1 %tmp13 +; CHECK-NOT: !prof + +bb14: ; preds = %bb10 + %tmp16 = call i32 @too() + %tmp17 = icmp sgt i32 %arg2, %tmp16 + br label %bb18 + +bb18: ; preds = %bb14, %bb10, %bb + %tmp19 = phi i32 [ 6, %bb10 ], [ 6, %bb ], [ %tmp16, %bb14 ] + %tmp20 = xor i32 %tmp19, 2 + %tmp23 = sext i32 %tmp20 to i64 + %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4) + %tmp25 = icmp ne i64 %tmp24, 0 + br i1 %tmp25, label %bb26, label %bb28 +; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]] + +bb26: ; preds = %bb18 + %tmp27 = call i32 @goo() + br label %bb30 + +bb28: ; preds = %bb18 + %tmp29 = call i32 @hoo() + br label %bb30 + +bb30: ; preds = %bb28, %bb26 + ret void +} + +declare i32 @goo() + +declare i32 @hoo() + +declare i32 @too() + +declare i8 @too8() + +; Function Attrs: nounwind readnone +declare i64 @llvm.expect.i64(i64, i64) + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 (trunk 302965)"} +; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 2000, i32 1} +; CHECK: [[WEIGHT2]] = !{!"branch_weights", i32 1, i32 2000} diff --git a/test/Transforms/LowerExpectIntrinsic/phi_or.ll b/test/Transforms/LowerExpectIntrinsic/phi_or.ll new file mode 100644 index 000000000000..849baef3dca8 --- /dev/null +++ b/test/Transforms/LowerExpectIntrinsic/phi_or.ll @@ -0,0 +1,103 @@ +; RUN: opt -lower-expect -S -o - < %s | FileCheck %s +; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s +; +; if (__builtin_expect((x > goo() || y > hoo()), 1)) { +; .. +; } +; For the above case, only the second branch should be +; annotated. +; if (__builtin_expect((x > goo() || y > hoo()), 0)) { +; .. +; } +; For the above case, two branches should be annotated. +; Function Attrs: noinline nounwind uwtable +define void @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo +bb: + %tmp8 = call i32 @goo() + %tmp9 = icmp slt i32 %arg, %tmp8 + br i1 %tmp9, label %bb14, label %bb10 +; CHECK: br i1 %tmp9 +; CHECK-NOT: br i1 %tmp9{{.*}}!prof + +bb10: ; preds = %bb + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %arg1, %tmp12 + br label %bb14 + +bb14: ; preds = %bb10, %bb + %tmp15 = phi i1 [ true, %bb ], [ %tmp13, %bb10 ] + %tmp16 = zext i1 %tmp15 to i32 + %tmp17 = sext i32 %tmp16 to i64 + %expect = call i64 @llvm.expect.i64(i64 %tmp17, i64 1) + %tmp18 = icmp ne i64 %expect, 0 + br i1 %tmp18, label %bb19, label %bb21 +; CHECK: br i1 %tmp18{{.*}}!prof [[WEIGHT:![0-9]+]] + +bb19: ; preds = %bb14 + %tmp20 = call i32 @goo() + br label %bb23 + +bb21: ; preds = %bb14 + %tmp22 = call i32 @hoo() + br label %bb23 + +bb23: ; preds = %bb21, %bb19 + ret void +} + +define void @foo2(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) { +; CHECK-LABEL: void @foo2 +bb: + %tmp = alloca i32, align 4 + %tmp4 = alloca i32, align 4 + %tmp5 = alloca i32, align 4 + %tmp6 = alloca i32, align 4 + store i32 %arg, i32* %tmp, align 4 + store i32 %arg1, i32* %tmp4, align 4 + store i32 %arg2, i32* %tmp5, align 4 + store i32 %arg3, i32* %tmp6, align 4 + %tmp7 = load i32, i32* %tmp, align 4 + %tmp8 = call i32 @goo() + %tmp9 = icmp slt i32 %tmp7, %tmp8 + br i1 %tmp9, label %bb14, label %bb10 +; CHECK: br i1 %tmp9{{.*}}!prof [[WEIGHT2:![0-9]+]] + +bb10: ; preds = %bb + %tmp11 = load i32, i32* %tmp5, align 4 + %tmp12 = call i32 @hoo() + %tmp13 = icmp sgt i32 %tmp11, %tmp12 + br label %bb14 + +bb14: ; preds = %bb10, %bb + %tmp15 = phi i1 [ true, %bb ], [ %tmp13, %bb10 ] + %tmp16 = zext i1 %tmp15 to i32 + %tmp17 = sext i32 %tmp16 to i64 + %expect = call i64 @llvm.expect.i64(i64 %tmp17, i64 0) + %tmp18 = icmp ne i64 %expect, 0 + br i1 %tmp18, label %bb19, label %bb21 +; CHECK: br i1 %tmp18{{.*}}!prof [[WEIGHT2]] + +bb19: ; preds = %bb14 + %tmp20 = call i32 @goo() + br label %bb23 + +bb21: ; preds = %bb14 + %tmp22 = call i32 @hoo() + br label %bb23 + +bb23: ; preds = %bb21, %bb19 + ret void +} + +declare i32 @goo() +declare i32 @hoo() +declare i64 @llvm.expect.i64(i64, i64) + + +!llvm.ident = !{!0} + + +!0 = !{!"clang version 5.0.0 (trunk 302965)"} +; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 2000, i32 1} +; CHECK: [[WEIGHT2]] = !{!"branch_weights", i32 1, i32 2000} diff --git a/test/Transforms/LowerExpectIntrinsic/phi_tern.ll b/test/Transforms/LowerExpectIntrinsic/phi_tern.ll new file mode 100644 index 000000000000..3c603d51b438 --- /dev/null +++ b/test/Transforms/LowerExpectIntrinsic/phi_tern.ll @@ -0,0 +1,56 @@ +; RUN: opt -lower-expect -S -o - < %s | FileCheck %s +; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s + +; return __builtin_expect((a > b ? 1, goo(), 0); +; +; Function Attrs: noinline nounwind uwtable +define i32 @foo(i32 %arg, i32 %arg1) { +; CHECK-LABEL: i32 @foo +bb: + %tmp5 = icmp sgt i32 %arg, %arg1 + br i1 %tmp5, label %bb9, label %bb7 +; CHECK: br i1 %tmp5{{.*}}!prof [[WEIGHT:![0-9]+]] + +bb7: ; preds = %bb + %tmp8 = call i32 @goo() + br label %bb9 + +bb9: ; preds = %bb7, %bb9 + %tmp10 = phi i32 [ 1, %bb ], [ %tmp8, %bb7 ] + %tmp11 = sext i32 %tmp10 to i64 + %expect = call i64 @llvm.expect.i64(i64 %tmp11, i64 0) + %tmp12 = trunc i64 %expect to i32 + ret i32 %tmp12 +} + +define i32 @foo2(i32 %arg, i32 %arg1) { +bb: + %tmp5 = icmp sgt i32 %arg, %arg1 + br i1 %tmp5, label %bb6, label %bb7 +; CHECK: br i1 %tmp5{{.*}}!prof [[WEIGHT:![0-9]+]] + +bb6: ; preds = %bb + br label %bb9 + +bb7: ; preds = %bb + %tmp8 = call i32 @goo() + br label %bb9 + +bb9: ; preds = %bb7, %bb6 + %tmp10 = phi i32 [ 1, %bb6 ], [ %tmp8, %bb7 ] + %tmp11 = sext i32 %tmp10 to i64 + %expect = call i64 @llvm.expect.i64(i64 %tmp11, i64 0) + %tmp12 = trunc i64 %expect to i32 + ret i32 %tmp12 +} + +declare i32 @goo() +declare i64 @llvm.expect.i64(i64, i64) + + + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 (trunk 302965)"} + +; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 1, i32 2000} diff --git a/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml b/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml index b7a1d208fc6f..cfac37986bda 100644 --- a/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml +++ b/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml @@ -1,7 +1,8 @@ --- GlobalValueMap: 42: - - TypeTests: [123] + - Live: true + TypeTests: [123] TypeIdMap: typeid1: TTRes: diff --git a/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml new file mode 100644 index 000000000000..7baa02ada86c --- /dev/null +++ b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml @@ -0,0 +1,7 @@ +--- +GlobalValueMap: + 42: + - Live: false + TypeTests: [14276520915468743435] # guid("typeid1") +WithGlobalValueDeadStripping: true +... diff --git a/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml index 031b2e8de04e..f30257cfc0d4 100644 --- a/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml +++ b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml @@ -1,5 +1,6 @@ --- GlobalValueMap: 42: - - TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2") + - Live: true + TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2") ... diff --git a/test/Transforms/LowerTypeTests/export-dead.ll b/test/Transforms/LowerTypeTests/export-dead.ll new file mode 100644 index 000000000000..265402b34a6e --- /dev/null +++ b/test/Transforms/LowerTypeTests/export-dead.ll @@ -0,0 +1,14 @@ +; The only use of "typeid1" is in a dead function. Export nothing. + +; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-dead.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s +; RUN: FileCheck --check-prefix=SUMMARY %s < %t + +@foo = constant i32 42, !type !0 + +!0 = !{i32 0, !"typeid1"} + +; CHECK-NOT: @__typeid_typeid1_global_addr = + +; SUMMARY: TypeIdMap: +; SUMMARY-NEXT: WithGlobalValueDeadStripping: true +; SUMMARY-NEXT: ... diff --git a/test/Transforms/LowerTypeTests/export-nothing.ll b/test/Transforms/LowerTypeTests/export-nothing.ll index 9ab41b5f6cb6..8ad331539942 100644 --- a/test/Transforms/LowerTypeTests/export-nothing.ll +++ b/test/Transforms/LowerTypeTests/export-nothing.ll @@ -4,4 +4,5 @@ ; CHECK: --- ; CHECK-NEXT: GlobalValueMap: ; CHECK-NEXT: TypeIdMap: +; CHECK-NEXT: WithGlobalValueDeadStripping: false ; CHECK-NEXT: ... diff --git a/test/Transforms/LowerTypeTests/import-unsat.ll b/test/Transforms/LowerTypeTests/import-unsat.ll index 76b244001986..6cb9b26fb574 100644 --- a/test/Transforms/LowerTypeTests/import-unsat.ll +++ b/test/Transforms/LowerTypeTests/import-unsat.ll @@ -4,7 +4,10 @@ ; SUMMARY: GlobalValueMap: ; SUMMARY-NEXT: 42: -; SUMMARY-NEXT: - TypeTests: [ 123 ] +; SUMMARY-NEXT: - Linkage: 0 +; SUMMARY-NEXT: NotEligibleToImport: false +; SUMMARY-NEXT: Live: true +; SUMMARY-NEXT: TypeTests: [ 123 ] ; SUMMARY-NEXT: TypeIdMap: ; SUMMARY-NEXT: typeid1: ; SUMMARY-NEXT: TTRes: diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll index 119f2252d95e..8fba30c2720f 100644 --- a/test/Transforms/SROA/address-spaces.ll +++ b/test/Transforms/SROA/address-spaces.ll @@ -83,3 +83,21 @@ define void @pr27557() { store i32 addrspace(3)* @l, i32 addrspace(3)** %3, align 8 ret void } + +; Make sure pre-splitting doesn't try to introduce an illegal bitcast +define float @presplit(i64 addrspace(1)* %p) { +entry: +; CHECK-LABEL: @presplit( +; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)* +; CHECK: load i32, i32 addrspace(1)* %[[CAST]] + %b = alloca i64 + %b.cast = bitcast i64* %b to [2 x float]* + %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0 + %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1 + %l = load i64, i64 addrspace(1)* %p + store i64 %l, i64* %b + %f1 = load float, float* %b.gep1 + %f2 = load float, float* %b.gep2 + %ret = fadd float %f1, %f2 + ret float %ret +} diff --git a/test/Transforms/Util/PredicateInfo/condprop.ll b/test/Transforms/Util/PredicateInfo/condprop.ll index 61f59f03e1bc..496bb8385217 100644 --- a/test/Transforms/Util/PredicateInfo/condprop.ll +++ b/test/Transforms/Util/PredicateInfo/condprop.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s -; RUN: opt -print-predicateinfo -analyze -reverse-iterate < %s 2>&1 | FileCheck %s @a = external global i32 ; <i32*> [#uses=7] diff --git a/test/Transforms/Util/PredicateInfo/condprop2.ll b/test/Transforms/Util/PredicateInfo/condprop2.ll new file mode 100644 index 000000000000..415fa7c879e3 --- /dev/null +++ b/test/Transforms/Util/PredicateInfo/condprop2.ll @@ -0,0 +1,474 @@ +; REQUIRES: asserts +; NOTE: The flag -reverse-iterate is present only in a +Asserts build. +; Hence, this test has been split from condprop.ll to test with -reverse-iterate. +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -print-predicateinfo -analyze -reverse-iterate < %s 2>&1 | FileCheck %s + +@a = external global i32 ; <i32*> [#uses=7] + +define i32 @test1() nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB:%.*]], label [[BB1:%.*]] +; CHECK: bb: +; CHECK-NEXT: br label [[BB8:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 5 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 4 +; CHECK-NEXT: br i1 [[TMP5]], label [[BB4:%.*]], label [[BB5:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 5 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb5: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 5 +; CHECK-NEXT: br i1 [[TMP9]], label [[BB6:%.*]], label [[BB7:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb7: +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: br label [[BB8]] +; CHECK: bb8: +; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP12]], [[BB7]] ], [ [[TMP11]], [[BB6]] ], [ [[TMP7]], [[BB4]] ], [ 4, [[BB2]] ], [ 5, [[BB]] ] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 [[DOT0]] +; +entry: + %0 = load i32, i32* @a, align 4 + %1 = icmp eq i32 %0, 4 + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + br label %bb8 + +bb1: ; preds = %entry + %2 = load i32, i32* @a, align 4 + %3 = icmp eq i32 %2, 5 + br i1 %3, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb8 + +bb3: ; preds = %bb1 + %4 = load i32, i32* @a, align 4 + %5 = icmp eq i32 %4, 4 + br i1 %5, label %bb4, label %bb5 + +bb4: ; preds = %bb3 + %6 = load i32, i32* @a, align 4 + %7 = add i32 %6, 5 + br label %bb8 + +bb5: ; preds = %bb3 + %8 = load i32, i32* @a, align 4 + %9 = icmp eq i32 %8, 5 + br i1 %9, label %bb6, label %bb7 + +bb6: ; preds = %bb5 + %10 = load i32, i32* @a, align 4 + %11 = add i32 %10, 4 + br label %bb8 + +bb7: ; preds = %bb5 + %12 = load i32, i32* @a, align 4 + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb4, %bb2, %bb + %.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ] + br label %return + +return: ; preds = %bb8 + ret i32 %.0 +} + +declare void @foo(i1) +declare void @bar(i32) + +define void @test3(i32 %x, i32 %y) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK: [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]]) +; CHECK: [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]]) +; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]]) +; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]] +; CHECK: both_zero: +; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) +; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0]]) +; CHECK-NEXT: call void @bar(i32 [[Y_0]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[Z_0]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + br i1 %z, label %both_zero, label %nope +both_zero: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: + call void @foo(i1 %z) + ret void +} + +define void @test4(i1 %b, i32 %x) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]] +; CHECK: sw: +; CHECK: i32 0, label [[CASE0:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE0]] +; CHECK-NEXT: i32 3, label [[CASE3]] +; CHECK-NEXT: i32 4, label [[DEFAULT:%.*]] +; CHECK-NEXT: ] Edge: [label [[SW]],label %case1] } +; CHECK-NEXT: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X:%.*]]) +; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT]] [ +; CHECK-NEXT: i32 0, label [[CASE0]] +; CHECK-NEXT: i32 1, label [[CASE1]] +; CHECK-NEXT: i32 2, label [[CASE0]] +; CHECK-NEXT: i32 3, label [[CASE3]] +; CHECK-NEXT: i32 4, label [[DEFAULT]] +; CHECK-NEXT: ] +; CHECK: default: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; CHECK: case0: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; CHECK: case1: +; CHECK-NEXT: call void @bar(i32 [[X_0]]) +; CHECK-NEXT: ret void +; CHECK: case3: +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: ret void +; + br i1 %b, label %sw, label %case3 +sw: + switch i32 %x, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case0 + i32 3, label %case3 + i32 4, label %default + ] +default: + call void @bar(i32 %x) + ret void +case0: + call void @bar(i32 %x) + ret void +case1: + call void @bar(i32 %x) + ret void +case3: + call void @bar(i32 %x) + ret void +} + +define i1 @test5(i32 %x, i32 %y) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK: [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X_0]], [[Y_0]] +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X_1]], [[Y_1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp ne i32 %x, %y + ret i1 %cmp2 + +different: + %cmp3 = icmp eq i32 %x, %y + ret i1 %cmp3 +} + +define i1 @test6(i32 %x, i32 %y) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = icmp ne i32 %x, %y + %cmp = icmp eq i32 %x, %y + %cmp3 = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +define i1 @test6_fp(float %x, float %y) { +; CHECK-LABEL: @test6_fp( +; CHECK-NEXT: [[CMP2:%.*]] = fcmp une float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = fcmp oeq float [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = fcmp une float %x, %y + %cmp = fcmp oeq float %x, %y + %cmp3 = fcmp oeq float %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +define i1 @test7(i32 %x, i32 %y) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]] +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK: [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X_0]], [[Y_0]] +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X_1]], [[Y_1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp sle i32 %x, %y + ret i1 %cmp2 + +different: + %cmp3 = icmp sgt i32 %x, %y + ret i1 %cmp3 +} + +define i1 @test7_fp(float %x, float %y) { +; CHECK-LABEL: @test7_fp( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]] +; CHECK: [[X_0:%.*]] = call float @llvm.ssa.copy.f32(float [[X]]) +; CHECK: [[X_1:%.*]] = call float @llvm.ssa.copy.f32(float [[X]]) +; CHECK: [[Y_0:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]]) +; CHECK: [[Y_1:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]]) +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X_0]], [[Y_0]] +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X_1]], [[Y_1]] +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = fcmp ule float %x, %y + ret i1 %cmp2 + +different: + %cmp3 = fcmp ogt float %x, %y + ret i1 %cmp3 +} + +define i1 @test8(i32 %x, i32 %y) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = icmp sle i32 %x, %y + %cmp = icmp sgt i32 %x, %y + %cmp3 = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +define i1 @test8_fp(float %x, float %y) { +; CHECK-LABEL: @test8_fp( +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X]], [[Y]] +; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X]], [[Y]] +; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]] +; CHECK: same: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: different: +; CHECK-NEXT: ret i1 [[CMP3]] +; + %cmp2 = fcmp ule float %x, %y + %cmp = fcmp ogt float %x, %y + %cmp3 = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: + ret i1 %cmp2 + +different: + ret i1 %cmp3 +} + +define i32 @test9(i32 %i, i32 %j) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]] +; CHECK: [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]]) +; CHECK: [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]]) +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]] +; CHECK-NEXT: ret i32 [[DIFF]] +; CHECK: ret: +; CHECK-NEXT: ret i32 5 +; + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff + +ret: + ret i32 5 +} + +define i32 @test10(i32 %j, i32 %i) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]] +; CHECK: [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]]) +; CHECK: [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]]) +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]] +; CHECK-NEXT: ret i32 [[DIFF]] +; CHECK: ret: +; CHECK-NEXT: ret i32 5 +; + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff + +ret: + ret i32 5 +} + +declare i32 @yogibar() + +define i32 @test11(i32 %x) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[V0:%.*]] = call i32 @yogibar() +; CHECK-NEXT: [[V1:%.*]] = call i32 @yogibar() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]] +; CHECK: [[V0_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0]]) +; CHECK: [[V1_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V1]]) +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: ret i32 [[V1_0]] +; CHECK: next: +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0_0]] +; CHECK: [[V0_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0_0]]) +; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]] +; CHECK: cond_true2: +; CHECK-NEXT: ret i32 [[V0_0_1]] +; CHECK: next2: +; CHECK-NEXT: ret i32 0 +; + %v0 = call i32 @yogibar() + %v1 = call i32 @yogibar() + %cmp = icmp eq i32 %v0, %v1 + br i1 %cmp, label %cond_true, label %next + +cond_true: + ret i32 %v1 + +next: + %cmp2 = icmp eq i32 %x, %v0 + br i1 %cmp2, label %cond_true2, label %next2 + +cond_true2: + ret i32 %v0 + +next2: + ret i32 0 +} + +define i32 @test12(i32 %x) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CHECK: cond_true: +; CHECK-NEXT: br label [[RET:%.*]] +; CHECK: cond_false: +; CHECK-NEXT: br label [[RET]] +; CHECK: ret: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X_0]], [[COND_TRUE]] ], [ [[X_1]], [[COND_FALSE]] ] +; CHECK-NEXT: ret i32 [[RES]] +; + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond_true, label %cond_false + +cond_true: + br label %ret + +cond_false: + br label %ret + +ret: + %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ] + ret i32 %res +} diff --git a/test/Transforms/Util/PredicateInfo/testandor.ll b/test/Transforms/Util/PredicateInfo/testandor.ll index 43c508670908..c1048cf6d0f6 100644 --- a/test/Transforms/Util/PredicateInfo/testandor.ll +++ b/test/Transforms/Util/PredicateInfo/testandor.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s -; RUN: opt -print-predicateinfo -reverse-iterate < %s 2>&1 | FileCheck %s declare void @foo(i1) declare void @bar(i32) diff --git a/test/Transforms/Util/PredicateInfo/testandor2.ll b/test/Transforms/Util/PredicateInfo/testandor2.ll new file mode 100644 index 000000000000..a03250c2f7a0 --- /dev/null +++ b/test/Transforms/Util/PredicateInfo/testandor2.ll @@ -0,0 +1,214 @@ +; REQUIRES: asserts +; NOTE: The flag -reverse-iterate is present only in a +Asserts build. +; Hence, this test has been split from testandor.ll to test with -reverse-iterate. +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -print-predicateinfo -reverse-iterate < %s 2>&1 | FileCheck %s + +declare void @foo(i1) +declare void @bar(i32) +declare void @llvm.assume(i1) + +define void @testor(i32 %x, i32 %y) { +; CHECK-LABEL: @testor( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]] +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK: [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]]) +; CHECK: [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]]) +; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]]) +; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER:%.*]] +; CHECK: oneof: +; CHECK-NEXT: call void @foo(i1 [[XZ]]) +; CHECK-NEXT: call void @foo(i1 [[YZ]]) +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: call void @bar(i32 [[Y]]) +; CHECK-NEXT: ret void +; CHECK: neither: +; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) +; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0]]) +; CHECK-NEXT: call void @bar(i32 [[Y_0]]) +; CHECK-NEXT: call void @foo(i1 [[Z_0]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = or i1 %xz, %yz + br i1 %z, label %oneof, label %neither +oneof: +;; Should not insert on the true edge for or + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +neither: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + call void @foo(i1 %z) + ret void +} +define void @testand(i32 %x, i32 %y) { +; CHECK-LABEL: @testand( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK: [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]]) +; CHECK: [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]]) +; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]]) +; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK-NEXT: call void @foo(i1 [[XZ_0]]) +; CHECK-NEXT: call void @foo(i1 [[YZ_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0]]) +; CHECK-NEXT: call void @bar(i32 [[Y_0]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[XZ]]) +; CHECK-NEXT: call void @foo(i1 [[YZ]]) +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: call void @bar(i32 [[Y]]) +; CHECK-NEXT: call void @foo(i1 [[Z_0]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: +;; Should not insert on the false edge for and + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + call void @foo(i1 %z) + ret void +} +define void @testandsame(i32 %x, i32 %y) { +; CHECK-LABEL: @testandsame( +; CHECK-NEXT: [[XGT:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[XLT:%.*]] = icmp slt i32 [[X]], 100 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XGT]], [[XLT]] +; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[X_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]]) +; CHECK: [[XGT_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XGT]]) +; CHECK: [[XLT_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XLT]]) +; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]]) +; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK-NEXT: call void @foo(i1 [[XGT_0]]) +; CHECK-NEXT: call void @foo(i1 [[XLT_0]]) +; CHECK-NEXT: call void @bar(i32 [[X_0_1]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[XGT]]) +; CHECK-NEXT: call void @foo(i1 [[XLT]]) +; CHECK-NEXT: call void @foo(i1 [[Z_0]]) +; CHECK-NEXT: ret void +; + %xgt = icmp sgt i32 %x, 0 + %xlt = icmp slt i32 %x, 100 + %z = and i1 %xgt, %xlt + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xgt) + call void @foo(i1 %xlt) + call void @bar(i32 %x) + ret void +nope: + call void @foo(i1 %xgt) + call void @foo(i1 %xlt) + call void @foo(i1 %z) + ret void +} + +define void @testandassume(i32 %x, i32 %y) { +; CHECK-LABEL: @testandassume( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]] +; CHECK: [[TMP1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]]) +; CHECK: [[TMP2:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]]) +; CHECK: [[TMP3:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]]) +; CHECK: [[TMP4:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]]) +; CHECK: [[TMP5:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP5]]) +; CHECK: [[DOT0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[TMP1]]) +; CHECK: [[DOT01:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[TMP2]]) +; CHECK: [[DOT02:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP3]]) +; CHECK: [[DOT03:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP4]]) +; CHECK: [[DOT04:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP5]]) +; CHECK-NEXT: br i1 [[TMP5]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK-NEXT: call void @foo(i1 [[DOT02]]) +; CHECK-NEXT: call void @foo(i1 [[DOT03]]) +; CHECK-NEXT: call void @bar(i32 [[DOT0]]) +; CHECK-NEXT: call void @bar(i32 [[DOT01]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[DOT04]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + call void @llvm.assume(i1 %z) + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: + call void @foo(i1 %z) + ret void +} + +;; Unlike and/or for branches, assume is *always* true, so we only match and for it +define void @testorassume(i32 %x, i32 %y) { +; +; CHECK-LABEL: @testorassume( +; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]] +; CHECK-NEXT: call void @llvm.assume(i1 [[Z]]) +; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]]) +; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]] +; CHECK: both: +; CHECK-NEXT: call void @foo(i1 [[XZ]]) +; CHECK-NEXT: call void @foo(i1 [[YZ]]) +; CHECK-NEXT: call void @bar(i32 [[X]]) +; CHECK-NEXT: call void @bar(i32 [[Y]]) +; CHECK-NEXT: ret void +; CHECK: nope: +; CHECK-NEXT: call void @foo(i1 [[Z_0]]) +; CHECK-NEXT: ret void +; + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = or i1 %xz, %yz + call void @llvm.assume(i1 %z) + br i1 %z, label %both, label %nope +both: + call void @foo(i1 %xz) + call void @foo(i1 %yz) + call void @bar(i32 %x) + call void @bar(i32 %y) + ret void +nope: + call void @foo(i1 %z) + ret void +} diff --git a/test/Transforms/WholeProgramDevirt/Inputs/export.yaml b/test/Transforms/WholeProgramDevirt/Inputs/export.yaml index 0f6f59de7522..71cf38b216c7 100644 --- a/test/Transforms/WholeProgramDevirt/Inputs/export.yaml +++ b/test/Transforms/WholeProgramDevirt/Inputs/export.yaml @@ -1,7 +1,8 @@ --- GlobalValueMap: 42: - - TypeTestAssumeVCalls: + - Live: true + TypeTestAssumeVCalls: - GUID: 14276520915468743435 # typeid1 Offset: 0 TypeCheckedLoadVCalls: diff --git a/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml b/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml index 1cb3ad3f134c..30159c5012b0 100644 --- a/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml +++ b/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml @@ -1,7 +1,8 @@ --- GlobalValueMap: 42: - - TypeTestAssumeVCalls: + - Live: true + TypeTestAssumeVCalls: - GUID: 123 Offset: 0 - GUID: 456 diff --git a/test/Transforms/WholeProgramDevirt/export-nothing.ll b/test/Transforms/WholeProgramDevirt/export-nothing.ll index e0814efbf9c0..4707eaa17ead 100644 --- a/test/Transforms/WholeProgramDevirt/export-nothing.ll +++ b/test/Transforms/WholeProgramDevirt/export-nothing.ll @@ -4,4 +4,5 @@ ; CHECK: --- ; CHECK-NEXT: GlobalValueMap: ; CHECK-NEXT: TypeIdMap: +; CHECK-NEXT: WithGlobalValueDeadStripping: false ; CHECK-NEXT: ... diff --git a/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/test/Transforms/WholeProgramDevirt/export-single-impl.ll index f4f3fd054c46..15de77381ed1 100644 --- a/test/Transforms/WholeProgramDevirt/export-single-impl.ll +++ b/test/Transforms/WholeProgramDevirt/export-single-impl.ll @@ -38,6 +38,7 @@ ; SUMMARY-NEXT: Kind: SingleImpl ; SUMMARY-NEXT: SingleImplName: 'vf4$merged' ; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: WithGlobalValueDeadStripping: false ; SUMMARY-NEXT: ... ; CHECK: @vt1 = constant void (i8*)* @vf1 diff --git a/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll index 1d7030c41fd0..11b1c5de4d83 100644 --- a/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll +++ b/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll @@ -1,8 +1,7 @@ ; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t -; SUMMARY: - TypeTests: -; SUMMARY-NEXT: TypeTestAssumeVCalls: +; SUMMARY-NOT: TypeTests: ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid4: diff --git a/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll index 174a573b5b0d..0878d01cce03 100644 --- a/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll +++ b/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll @@ -1,8 +1,7 @@ ; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s ; RUN: FileCheck --check-prefix=SUMMARY %s < %t -; SUMMARY: - TypeTests: -; SUMMARY-NEXT: TypeTestAssumeVCalls: +; SUMMARY-NOT: TypeTests: ; SUMMARY: TypeIdMap: ; SUMMARY-NEXT: typeid3: diff --git a/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll b/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll index 0785ade28570..3132444a9f36 100644 --- a/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll +++ b/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll @@ -1,7 +1,7 @@ ; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s ; RUN: FileCheck %s < %t -; CHECK: - TypeTests: [ 15427464259790519041, 17525413373118030901 ] +; CHECK: TypeTests: [ 15427464259790519041, 17525413373118030901 ] ; CHECK-NEXT: TypeTestAssumeVCalls: @vt1a = constant void (i8*)* @vf1a, !type !0 diff --git a/test/Transforms/WholeProgramDevirt/import-indir.ll b/test/Transforms/WholeProgramDevirt/import-indir.ll index 1de9352eeb22..73c982b17893 100644 --- a/test/Transforms/WholeProgramDevirt/import-indir.ll +++ b/test/Transforms/WholeProgramDevirt/import-indir.ll @@ -4,7 +4,9 @@ ; SUMMARY: GlobalValueMap: ; SUMMARY-NEXT: 42: -; SUMMARY-NEXT: - TypeTests: +; SUMMARY-NEXT: - Linkage: 0 +; SUMMARY-NEXT: NotEligibleToImport: false +; SUMMARY-NEXT: Live: true ; SUMMARY-NEXT: TypeTestAssumeVCalls: ; SUMMARY-NEXT: - GUID: 123 ; SUMMARY-NEXT: Offset: 0 diff --git a/test/tools/llvm-lto2/X86/pipeline.ll b/test/tools/llvm-lto2/X86/pipeline.ll index dbec9ab22527..7effb0c801b9 100644 --- a/test/tools/llvm-lto2/X86/pipeline.ll +++ b/test/tools/llvm-lto2/X86/pipeline.ll @@ -8,7 +8,7 @@ ; Try the new pass manager LTO default pipeline (make sure the option ; is accepted). -; RUN: llvm-lto2 run %t1.bc -o %t.o -lto-use-new-pm -r %t1.bc,patatino,px +; RUN: llvm-lto2 run %t1.bc -o %t.o -use-new-pm -r %t1.bc,patatino,px target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 589005943045..e10d112dcf90 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -304,6 +304,9 @@ int main(int argc, char **argv) { initializeScalarizeMaskedMemIntrinPass(*Registry); initializeExpandReductionsPass(*Registry); + // Initialize debugging passes. + initializeScavengerTestPass(*Registry); + // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); diff --git a/tools/llvm-config/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp index 888da7143c9f..08b096afb052 100644 --- a/tools/llvm-config/llvm-config.cpp +++ b/tools/llvm-config/llvm-config.cpp @@ -333,7 +333,7 @@ int main(int argc, char **argv) { } else { ActivePrefix = CurrentExecPrefix; ActiveIncludeDir = ActivePrefix + "/include"; - SmallString<PATH_MAX> path(StringRef(LLVM_TOOLS_INSTALL_DIR)); + SmallString<256> path(StringRef(LLVM_TOOLS_INSTALL_DIR)); sys::fs::make_absolute(ActivePrefix, path); ActiveBinDir = path.str(); ActiveLibDir = ActivePrefix + "/lib" + LLVM_LIBDIR_SUFFIX; diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp index 3d2643db85bd..89f85157e1df 100644 --- a/tools/llvm-lto2/llvm-lto2.cpp +++ b/tools/llvm-lto2/llvm-lto2.cpp @@ -99,6 +99,11 @@ static cl::opt<bool> OptRemarksWithHotness( cl::desc("Whether to include hotness informations in the remarks.\n" "Has effect only if -pass-remarks-output is specified.")); +static cl::opt<bool> + UseNewPM("use-new-pm", + cl::desc("Run LTO passes using the new pass manager"), + cl::init(false), cl::Hidden); + static void check(Error E, std::string Msg) { if (!E) return; @@ -196,6 +201,7 @@ static int run(int argc, char **argv) { Conf.AAPipeline = AAPipeline; Conf.OptLevel = OptLevel - '0'; + Conf.UseNewPM = UseNewPM; switch (CGOptLevel) { case '0': Conf.CGOptLevel = CodeGenOpt::None; @@ -351,7 +357,7 @@ int main(int argc, char **argv) { // FIXME: This should use llvm::cl subcommands, but it isn't currently // possible to pass an argument not associated with a subcommand to a - // subcommand (e.g. -lto-use-new-pm). + // subcommand (e.g. -use-new-pm). if (argc < 2) return usage(); diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbdump/LLVMOutputStyle.cpp index d95eca1aeddb..31c342cd0f5a 100644 --- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp +++ b/tools/llvm-pdbdump/LLVMOutputStyle.cpp @@ -483,8 +483,8 @@ Error LLVMOutputStyle::dumpStreamBytes() { if (SI >= File.getNumStreams()) return make_error<RawError>(raw_error_code::no_stream); - auto S = MappedBlockStream::createIndexedStream(File.getMsfLayout(), - File.getMsfBuffer(), SI); + auto S = MappedBlockStream::createIndexedStream( + File.getMsfLayout(), File.getMsfBuffer(), SI, File.getAllocator()); if (!S) continue; DictScope DD(P, "Stream"); @@ -791,7 +791,7 @@ Error LLVMOutputStyle::dumpDbiStream() { if (HasModuleDI && (ShouldDumpSymbols || opts::raw::DumpLineInfo)) { auto ModStreamData = MappedBlockStream::createIndexedStream( File.getMsfLayout(), File.getMsfBuffer(), - Modi.getModuleStreamIndex()); + Modi.getModuleStreamIndex(), File.getAllocator()); ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); if (auto EC = ModS.reload()) @@ -804,7 +804,8 @@ Error LLVMOutputStyle::dumpDbiStream() { auto &Types = *ExpectedTypes; ListScope SS(P, "Symbols"); - codeview::CVSymbolDumper SD(P, Types, nullptr, false); + codeview::CVSymbolDumper SD(P, Types, CodeViewContainer::Pdb, nullptr, + false); bool HadError = false; for (auto S : ModS.symbols(&HadError)) { DictScope LL(P, ""); @@ -830,8 +831,7 @@ Error LLVMOutputStyle::dumpDbiStream() { return ExpectedTypes.takeError(); auto &IpiItems = *ExpectedTypes; C13RawVisitor V(P, File, IpiItems); - if (auto EC = - codeview::visitDebugSubsections(ModS.linesAndChecksums(), V)) + if (auto EC = codeview::visitDebugSubsections(ModS.subsections(), V)) return EC; } } @@ -952,7 +952,7 @@ Error LLVMOutputStyle::dumpPublicsStream() { return ExpectedTypes.takeError(); auto &Tpi = *ExpectedTypes; - codeview::CVSymbolDumper SD(P, Tpi, nullptr, false); + codeview::CVSymbolDumper SD(P, Tpi, CodeViewContainer::Pdb, nullptr, false); bool HadError = false; for (auto S : Publics->getSymbols(&HadError)) { DictScope DD(P, ""); diff --git a/tools/llvm-pdbdump/PdbYaml.cpp b/tools/llvm-pdbdump/PdbYaml.cpp index e288063e2afa..b4a41fbfdb8f 100644 --- a/tools/llvm-pdbdump/PdbYaml.cpp +++ b/tools/llvm-pdbdump/PdbYaml.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h" #include "llvm/DebugInfo/CodeView/TypeSerializer.h" @@ -21,6 +22,7 @@ #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" +#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h" #include "llvm/ObjectYAML/CodeViewYAMLTypes.h" using namespace llvm; @@ -220,6 +222,6 @@ void MappingTraits<PdbDbiModuleInfo>::mapping(IO &IO, PdbDbiModuleInfo &Obj) { IO.mapRequired("Module", Obj.Mod); IO.mapOptional("ObjFile", Obj.Obj, Obj.Mod); IO.mapOptional("SourceFiles", Obj.SourceFiles); - IO.mapOptional("LineInfo", Obj.FileLineInfo); + IO.mapOptional("Subsections", Obj.Subsections); IO.mapOptional("Modi", Obj.Modi); } diff --git a/tools/llvm-pdbdump/PdbYaml.h b/tools/llvm-pdbdump/PdbYaml.h index deb500ec2074..62ed608916fc 100644 --- a/tools/llvm-pdbdump/PdbYaml.h +++ b/tools/llvm-pdbdump/PdbYaml.h @@ -28,6 +28,9 @@ #include <vector> namespace llvm { +namespace codeview { +class DebugStringTableSubsection; +} namespace pdb { namespace yaml { @@ -68,7 +71,7 @@ struct PdbDbiModuleInfo { StringRef Obj; StringRef Mod; std::vector<StringRef> SourceFiles; - Optional<CodeViewYAML::SourceFileInfo> FileLineInfo; + std::vector<CodeViewYAML::YAMLDebugSubsection> Subsections; Optional<PdbModiStream> Modi; }; diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/tools/llvm-pdbdump/YAMLOutputStyle.cpp index 18839a7679d3..ee72b90b12d1 100644 --- a/tools/llvm-pdbdump/YAMLOutputStyle.cpp +++ b/tools/llvm-pdbdump/YAMLOutputStyle.cpp @@ -101,117 +101,6 @@ Error YAMLOutputStyle::dump() { return Error::success(); } -namespace { -class C13YamlVisitor : public C13DebugFragmentVisitor { -public: - C13YamlVisitor(CodeViewYAML::SourceFileInfo &Info, PDBFile &F) - : C13DebugFragmentVisitor(F), Info(Info) {} - - Error handleFileChecksums() override { - for (const auto &C : *Checksums) { - CodeViewYAML::SourceFileChecksumEntry Entry; - if (auto Result = getNameFromStringTable(C.FileNameOffset)) - Entry.FileName = *Result; - else - return Result.takeError(); - - Entry.Kind = C.Kind; - Entry.ChecksumBytes.Bytes = C.Checksum; - Info.FileChecksums.push_back(Entry); - } - return Error::success(); - } - - Error handleLines() override { - for (const auto &LF : Lines) { - Info.LineFragments.emplace_back(); - auto &Fragment = Info.LineFragments.back(); - - Fragment.CodeSize = LF.header()->CodeSize; - Fragment.Flags = - static_cast<codeview::LineFlags>(uint16_t(LF.header()->Flags)); - Fragment.RelocOffset = LF.header()->RelocOffset; - Fragment.RelocSegment = LF.header()->RelocSegment; - - for (const auto &L : LF) { - Fragment.Blocks.emplace_back(); - auto &Block = Fragment.Blocks.back(); - - if (auto Result = getNameFromChecksumsBuffer(L.NameIndex)) - Block.FileName = *Result; - else - return Result.takeError(); - - for (const auto &N : L.LineNumbers) { - CodeViewYAML::SourceLineEntry Line; - Line.Offset = N.Offset; - codeview::LineInfo LI(N.Flags); - Line.LineStart = LI.getStartLine(); - Line.EndDelta = LI.getLineDelta(); - Line.IsStatement = LI.isStatement(); - Block.Lines.push_back(Line); - } - - if (LF.hasColumnInfo()) { - for (const auto &C : L.Columns) { - CodeViewYAML::SourceColumnEntry Column; - Column.StartColumn = C.StartColumn; - Column.EndColumn = C.EndColumn; - Block.Columns.push_back(Column); - } - } - } - } - return Error::success(); - } - - Error handleInlineeLines() override { - for (const auto &ILF : InlineeLines) { - Info.Inlinees.emplace_back(); - auto &Inlinee = Info.Inlinees.back(); - - Inlinee.HasExtraFiles = ILF.hasExtraFiles(); - for (const auto &IL : ILF) { - Inlinee.Sites.emplace_back(); - auto &Site = Inlinee.Sites.back(); - if (auto Result = getNameFromChecksumsBuffer(IL.Header->FileID)) - Site.FileName = *Result; - else - return Result.takeError(); - - Site.Inlinee = IL.Header->Inlinee.getIndex(); - Site.SourceLineNum = IL.Header->SourceLineNum; - if (ILF.hasExtraFiles()) { - for (const auto &EF : IL.ExtraFiles) { - if (auto Result = getNameFromChecksumsBuffer(EF)) - Site.ExtraFiles.push_back(*Result); - else - return Result.takeError(); - } - } - } - } - return Error::success(); - } - -private: - CodeViewYAML::SourceFileInfo &Info; -}; -} - -Expected<Optional<CodeViewYAML::SourceFileInfo>> -YAMLOutputStyle::getFileLineInfo(const pdb::ModuleDebugStreamRef &ModS) { - if (!ModS.hasLineInfo()) - return None; - - CodeViewYAML::SourceFileInfo Info; - C13YamlVisitor Visitor(Info, File); - if (auto EC = - codeview::visitDebugSubsections(ModS.linesAndChecksums(), Visitor)) - return std::move(EC); - - return Info; -} Error YAMLOutputStyle::dumpFileHeaders() { if (opts::pdb2yaml::NoFileHeaders) @@ -236,14 +125,17 @@ Error YAMLOutputStyle::dumpFileHeaders() { } Error YAMLOutputStyle::dumpStringTable() { - if (!opts::pdb2yaml::StringTable) + bool RequiresStringTable = opts::pdb2yaml::DbiModuleSourceFileInfo || + opts::pdb2yaml::DbiModuleSourceLineInfo; + bool RequestedStringTable = opts::pdb2yaml::StringTable; + if (!RequiresStringTable && !RequestedStringTable) return Error::success(); - Obj.StringTable.emplace(); auto ExpectedST = File.getStringTable(); if (!ExpectedST) return ExpectedST.takeError(); + Obj.StringTable.emplace(); const auto &ST = ExpectedST.get(); for (auto ID : ST.name_ids()) { auto S = ST.getStringForID(ID); @@ -337,17 +229,30 @@ Error YAMLOutputStyle::dumpDbiStream() { continue; auto ModStreamData = msf::MappedBlockStream::createIndexedStream( - File.getMsfLayout(), File.getMsfBuffer(), ModiStream); + File.getMsfLayout(), File.getMsfBuffer(), ModiStream, + File.getAllocator()); pdb::ModuleDebugStreamRef ModS(MI, std::move(ModStreamData)); if (auto EC = ModS.reload()) return EC; - if (opts::pdb2yaml::DbiModuleSourceLineInfo) { - auto ExpectedInfo = getFileLineInfo(ModS); - if (!ExpectedInfo) - return ExpectedInfo.takeError(); - DMI.FileLineInfo = *ExpectedInfo; + auto ExpectedST = File.getStringTable(); + if (!ExpectedST) + return ExpectedST.takeError(); + if (opts::pdb2yaml::DbiModuleSourceLineInfo && + ModS.hasDebugSubsections()) { + auto ExpectedChecksums = ModS.findChecksumsSubsection(); + if (!ExpectedChecksums) + return ExpectedChecksums.takeError(); + + for (const auto &SS : ModS.subsections()) { + auto Converted = + CodeViewYAML::YAMLDebugSubsection::fromCodeViewSubection( + ExpectedST->getStringTable(), *ExpectedChecksums, SS); + if (!Converted) + return Converted.takeError(); + DMI.Subsections.push_back(*Converted); + } } if (opts::pdb2yaml::DbiModuleSyms) { diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.h b/tools/llvm-pdbdump/YAMLOutputStyle.h index 6e4067c48f88..3690e3529d4a 100644 --- a/tools/llvm-pdbdump/YAMLOutputStyle.h +++ b/tools/llvm-pdbdump/YAMLOutputStyle.h @@ -27,9 +27,6 @@ public: Error dump() override; private: - Expected<Optional<CodeViewYAML::SourceFileInfo>> - getFileLineInfo(const pdb::ModuleDebugStreamRef &ModS); - Error dumpStringTable(); Error dumpFileHeaders(); Error dumpStreamMetadata(); diff --git a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp b/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp index 14cd222d138a..5f09416a9ff6 100644 --- a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp +++ b/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp @@ -85,7 +85,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) { for (auto &Modi : DS.modules()) { auto ModStreamData = pdb::MappedBlockStream::createIndexedStream( - Modi.Info.getModuleStreamIndex(), *File); + Modi.Info.getModuleStreamIndex(), *File, File->getAllocator()); if (!ModStreamData) { consumeError(ModStreamData.takeError()); return 0; diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp index 0b2b766a3c52..4626de9c4440 100644 --- a/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -476,7 +476,6 @@ static void yamlToPdb(StringRef Path) { std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get(); llvm::yaml::Input In(Buffer->getBuffer()); - In.setContext(&Allocator); pdb::yaml::PdbObject YamlObj(Allocator); In >> YamlObj; @@ -535,67 +534,16 @@ static void yamlToPdb(StringRef Path) { ExitOnErr(DbiBuilder.addModuleSourceFile(MI.Mod, S)); if (MI.Modi.hasValue()) { const auto &ModiStream = *MI.Modi; - for (auto Symbol : ModiStream.Symbols) - ModiBuilder.addSymbol(Symbol.toCodeViewSymbol(Allocator)); - } - if (MI.FileLineInfo.hasValue()) { - const auto &FLI = *MI.FileLineInfo; - - // File Checksums must be emitted before line information, because line - // info records use offsets into the checksum buffer to reference a file's - // source file name. - auto Checksums = llvm::make_unique<DebugChecksumsSubsection>(Strings); - auto &ChecksumRef = *Checksums; - if (!FLI.FileChecksums.empty()) { - for (auto &FC : FLI.FileChecksums) - Checksums->addChecksum(FC.FileName, FC.Kind, FC.ChecksumBytes.Bytes); - } - ModiBuilder.setC13FileChecksums(std::move(Checksums)); - - for (const auto &Fragment : FLI.LineFragments) { - auto Lines = - llvm::make_unique<DebugLinesSubsection>(ChecksumRef, Strings); - Lines->setCodeSize(Fragment.CodeSize); - Lines->setRelocationAddress(Fragment.RelocSegment, - Fragment.RelocOffset); - Lines->setFlags(Fragment.Flags); - for (const auto &LC : Fragment.Blocks) { - Lines->createBlock(LC.FileName); - if (Lines->hasColumnInfo()) { - for (const auto &Item : zip(LC.Lines, LC.Columns)) { - auto &L = std::get<0>(Item); - auto &C = std::get<1>(Item); - uint32_t LE = L.LineStart + L.EndDelta; - Lines->addLineAndColumnInfo( - L.Offset, LineInfo(L.LineStart, LE, L.IsStatement), - C.StartColumn, C.EndColumn); - } - } else { - for (const auto &L : LC.Lines) { - uint32_t LE = L.LineStart + L.EndDelta; - Lines->addLineInfo(L.Offset, - LineInfo(L.LineStart, LE, L.IsStatement)); - } - } - } - ModiBuilder.addC13Fragment(std::move(Lines)); + for (auto Symbol : ModiStream.Symbols) { + ModiBuilder.addSymbol( + Symbol.toCodeViewSymbol(Allocator, CodeViewContainer::Pdb)); } + } - for (const auto &Inlinee : FLI.Inlinees) { - auto Inlinees = llvm::make_unique<DebugInlineeLinesSubsection>( - ChecksumRef, Inlinee.HasExtraFiles); - for (const auto &Site : Inlinee.Sites) { - Inlinees->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName, - Site.SourceLineNum); - if (!Inlinee.HasExtraFiles) - continue; - - for (auto EF : Site.ExtraFiles) { - Inlinees->addExtraFile(EF); - } - } - ModiBuilder.addC13Fragment(std::move(Inlinees)); - } + auto CodeViewSubsections = + ExitOnErr(CodeViewYAML::convertSubsectionList(MI.Subsections, Strings)); + for (auto &SS : CodeViewSubsections) { + ModiBuilder.addDebugSubsection(std::move(SS)); } } diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 663f7b4c8a82..bc07bd296ad2 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -978,7 +978,8 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection, Subsection.bytes_end()); auto CODD = llvm::make_unique<COFFObjectDumpDelegate>(*this, Section, Obj, SectionContents); - CVSymbolDumper CVSD(W, Types, std::move(CODD), opts::CodeViewSubsectionBytes); + CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD), + opts::CodeViewSubsectionBytes); CVSymbolArray Symbols; BinaryStreamReader Reader(BinaryData, llvm::support::little); if (auto EC = Reader.readArray(Symbols, Reader.getLength())) { diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp index 7367ad470e3a..ca6391024f27 100644 --- a/unittests/ADT/SmallVectorTest.cpp +++ b/unittests/ADT/SmallVectorTest.cpp @@ -424,6 +424,16 @@ TYPED_TEST(SmallVectorTest, AssignTest) { this->assertValuesInOrder(this->theVector, 2u, 77, 77); } +// Assign test +TYPED_TEST(SmallVectorTest, AssignRangeTest) { + SCOPED_TRACE("AssignTest"); + + this->theVector.push_back(Constructable(1)); + int arr[] = {1, 2, 3}; + this->theVector.assign(std::begin(arr), std::end(arr)); + this->assertValuesInOrder(this->theVector, 3u, 1, 2, 3); +} + // Move-assign test TYPED_TEST(SmallVectorTest, MoveAssignTest) { SCOPED_TRACE("MoveAssignTest"); diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt index 40d5ea5f5ad7..8082c54b9c66 100644 --- a/unittests/Analysis/CMakeLists.txt +++ b/unittests/Analysis/CMakeLists.txt @@ -9,17 +9,18 @@ add_llvm_unittest(AnalysisTests AliasAnalysisTest.cpp BlockFrequencyInfoTest.cpp BranchProbabilityInfoTest.cpp + CallGraphTest.cpp CFGTest.cpp CGSCCPassManagerTest.cpp - CallGraphTest.cpp LazyCallGraphTest.cpp LoopInfoTest.cpp MemoryBuiltinsTest.cpp MemorySSA.cpp + OrderedBasicBlockTest.cpp ProfileSummaryInfoTest.cpp ScalarEvolutionTest.cpp - TBAATest.cpp TargetLibraryInfoTest.cpp + TBAATest.cpp UnrollAnalyzer.cpp ValueTrackingTest.cpp ) diff --git a/unittests/Analysis/OrderedBasicBlockTest.cpp b/unittests/Analysis/OrderedBasicBlockTest.cpp new file mode 100644 index 000000000000..b8b9ff04ce7c --- /dev/null +++ b/unittests/Analysis/OrderedBasicBlockTest.cpp @@ -0,0 +1,58 @@ +//===- OrderedBasicBlockTest.cpp - OrderedBasicBlock unit tests -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +namespace llvm { +namespace { + +class OrderedBasicBlockTest : public testing::Test { +protected: + LLVMContext C; + + std::unique_ptr<Module> makeLLVMModule() { + const char *ModuleString = R"(define i32 @f(i32 %x) { + %add = add i32 %x, 42 + ret i32 %add + })"; + SMDiagnostic Err; + auto foo = parseAssemblyString(ModuleString, Err, C); + return foo; + } +}; + +TEST_F(OrderedBasicBlockTest, Basic) { + auto M = makeLLVMModule(); + Function *F = M->getFunction("f"); + BasicBlock::iterator I = F->front().begin(); + Instruction *Add = &*I++; + Instruction *Ret = &*I++; + + OrderedBasicBlock OBB(&F->front()); + // Intentionally duplicated to verify cached and uncached are the same. + EXPECT_FALSE(OBB.dominates(Add, Add)); + EXPECT_FALSE(OBB.dominates(Add, Add)); + EXPECT_TRUE(OBB.dominates(Add, Ret)); + EXPECT_TRUE(OBB.dominates(Add, Ret)); + EXPECT_FALSE(OBB.dominates(Ret, Add)); + EXPECT_FALSE(OBB.dominates(Ret, Add)); + EXPECT_FALSE(OBB.dominates(Ret, Ret)); + EXPECT_FALSE(OBB.dominates(Ret, Ret)); +} + +} // end anonymous namespace +} // end namespace llvm diff --git a/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp b/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp index 9d90e265df33..789fe515b018 100644 --- a/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp +++ b/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp @@ -70,6 +70,8 @@ public: return MSFStreamLayout{static_cast<uint32_t>(Data.size()), Blocks}; } + BumpPtrAllocator Allocator; + private: std::vector<support::ulittle32_t> Blocks; MutableArrayRef<uint8_t> Data; @@ -77,7 +79,8 @@ private: TEST(MappedBlockStreamTest, NumBlocks) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); EXPECT_EQ(F.block_size(), S->getBlockSize()); EXPECT_EQ(F.layout().Blocks.size(), S->getNumBlocks()); @@ -87,7 +90,8 @@ TEST(MappedBlockStreamTest, NumBlocks) { // and does not allocate. TEST(MappedBlockStreamTest, ReadBeyondEndOfStreamRef) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); BinaryStreamRef SR; @@ -102,13 +106,14 @@ TEST(MappedBlockStreamTest, ReadBeyondEndOfStreamRef) { // does not fail due to the length of the output buffer. TEST(MappedBlockStreamTest, ReadOntoNonEmptyBuffer) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str = "ZYXWVUTSRQPONMLKJIHGFEDCBA"; EXPECT_NO_ERROR(R.readFixedString(Str, 1)); EXPECT_EQ(Str, StringRef("A")); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); } // Tests that a read which crosses a block boundary, but where the subsequent @@ -116,18 +121,18 @@ TEST(MappedBlockStreamTest, ReadOntoNonEmptyBuffer) { // not allocate memory. TEST(MappedBlockStreamTest, ZeroCopyReadContiguousBreak) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), - F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str; EXPECT_NO_ERROR(R.readFixedString(Str, 2)); EXPECT_EQ(Str, StringRef("AB")); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); R.setOffset(6); EXPECT_NO_ERROR(R.readFixedString(Str, 4)); EXPECT_EQ(Str, StringRef("GHIJ")); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); } // Tests that a read which crosses a block boundary and cannot be referenced @@ -135,62 +140,67 @@ TEST(MappedBlockStreamTest, ZeroCopyReadContiguousBreak) { // requested. TEST(MappedBlockStreamTest, CopyReadNonContiguousBreak) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str; EXPECT_NO_ERROR(R.readFixedString(Str, 10)); EXPECT_EQ(Str, StringRef("ABCDEFGHIJ")); - EXPECT_EQ(10U, S->getNumBytesCopied()); + EXPECT_EQ(10U, F.Allocator.getBytesAllocated()); } // Test that an out of bounds read which doesn't cross a block boundary // fails and allocates no memory. TEST(MappedBlockStreamTest, InvalidReadSizeNoBreak) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str; R.setOffset(10); EXPECT_ERROR(R.readFixedString(Str, 1)); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); } // Test that an out of bounds read which crosses a contiguous block boundary // fails and allocates no memory. TEST(MappedBlockStreamTest, InvalidReadSizeContiguousBreak) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str; R.setOffset(6); EXPECT_ERROR(R.readFixedString(Str, 5)); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); } // Test that an out of bounds read which crosses a discontiguous block // boundary fails and allocates no memory. TEST(MappedBlockStreamTest, InvalidReadSizeNonContiguousBreak) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str; EXPECT_ERROR(R.readFixedString(Str, 11)); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); } // Tests that a read which is entirely contained within a single block but // beyond the end of a StreamRef fails. TEST(MappedBlockStreamTest, ZeroCopyReadNoBreak) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str; EXPECT_NO_ERROR(R.readFixedString(Str, 1)); EXPECT_EQ(Str, StringRef("A")); - EXPECT_EQ(0U, S->getNumBytesCopied()); + EXPECT_EQ(0U, F.Allocator.getBytesAllocated()); } // Tests that a read which is not aligned on the same boundary as a previous @@ -198,19 +208,20 @@ TEST(MappedBlockStreamTest, ZeroCopyReadNoBreak) { // previous allocation. TEST(MappedBlockStreamTest, UnalignedOverlappingRead) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str1; StringRef Str2; EXPECT_NO_ERROR(R.readFixedString(Str1, 7)); EXPECT_EQ(Str1, StringRef("ABCDEFG")); - EXPECT_EQ(7U, S->getNumBytesCopied()); + EXPECT_EQ(7U, F.Allocator.getBytesAllocated()); R.setOffset(2); EXPECT_NO_ERROR(R.readFixedString(Str2, 3)); EXPECT_EQ(Str2, StringRef("CDE")); EXPECT_EQ(Str1.data() + 2, Str2.data()); - EXPECT_EQ(7U, S->getNumBytesCopied()); + EXPECT_EQ(7U, F.Allocator.getBytesAllocated()); } // Tests that a read which is not aligned on the same boundary as a previous @@ -218,18 +229,19 @@ TEST(MappedBlockStreamTest, UnalignedOverlappingRead) { // still works correctly and allocates again from the shared pool. TEST(MappedBlockStreamTest, UnalignedOverlappingReadFail) { DiscontiguousStream F(BlocksAry, DataAry); - auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F); + auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F, + F.Allocator); BinaryStreamReader R(*S); StringRef Str1; StringRef Str2; EXPECT_NO_ERROR(R.readFixedString(Str1, 6)); EXPECT_EQ(Str1, StringRef("ABCDEF")); - EXPECT_EQ(6U, S->getNumBytesCopied()); + EXPECT_EQ(6U, F.Allocator.getBytesAllocated()); R.setOffset(4); EXPECT_NO_ERROR(R.readFixedString(Str2, 4)); EXPECT_EQ(Str2, StringRef("EFGH")); - EXPECT_EQ(10U, S->getNumBytesCopied()); + EXPECT_EQ(10U, F.Allocator.getBytesAllocated()); } TEST(MappedBlockStreamTest, WriteBeyondEndOfStream) { @@ -241,8 +253,8 @@ TEST(MappedBlockStreamTest, WriteBeyondEndOfStream) { "LargeBuffer is not big enough"); DiscontiguousStream F(BlocksAry, Data); - auto S = WritableMappedBlockStream::createStream( - F.block_size(), F.layout(), F); + auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(), + F, F.Allocator); ArrayRef<uint8_t> Buffer; EXPECT_ERROR(S->writeBytes(0, ArrayRef<uint8_t>(LargeBuffer))); @@ -254,8 +266,8 @@ TEST(MappedBlockStreamTest, WriteBeyondEndOfStream) { TEST(MappedBlockStreamTest, TestWriteBytesNoBreakBoundary) { static uint8_t Data[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'}; DiscontiguousStream F(BlocksAry, Data); - auto S = WritableMappedBlockStream::createStream( - F.block_size(), F.layout(), F); + auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(), + F, F.Allocator); ArrayRef<uint8_t> Buffer; EXPECT_NO_ERROR(S->readBytes(0, 1, Buffer)); @@ -287,8 +299,8 @@ TEST(MappedBlockStreamTest, TestWriteBytesBreakBoundary) { 'T', 'G', '.', '0', '0'}; DiscontiguousStream F(BlocksAry, Data); - auto S = WritableMappedBlockStream::createStream( - F.block_size(), F.layout(), F); + auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(), + F, F.Allocator); ArrayRef<uint8_t> Buffer; EXPECT_NO_ERROR(S->writeBytes(0, TestData)); @@ -306,8 +318,8 @@ TEST(MappedBlockStreamTest, TestWriteThenRead) { const uint32_t Blocks[] = {2, 1, 0, 6, 3, 4, 5, 7, 9, 8}; DiscontiguousStream F(Blocks, Data); - auto S = WritableMappedBlockStream::createStream( - F.block_size(), F.layout(), F); + auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(), + F, F.Allocator); enum class MyEnum : uint32_t { Val1 = 2908234, Val2 = 120891234 }; using support::ulittle32_t; @@ -399,7 +411,7 @@ TEST(MappedBlockStreamTest, TestWriteContiguousStreamRef) { DiscontiguousStream F(DestBlocks, DestData); auto DestStream = WritableMappedBlockStream::createStream( - F.block_size(), F.layout(), F); + F.block_size(), F.layout(), F, F.Allocator); // First write "Test Str" into the source stream. MutableBinaryByteStream SourceStream(SrcData, little); @@ -434,9 +446,9 @@ TEST(MappedBlockStreamTest, TestWriteDiscontiguousStreamRef) { DiscontiguousStream SrcF(SrcBlocks, SrcData); auto Dest = WritableMappedBlockStream::createStream( - DestF.block_size(), DestF.layout(), DestF); + DestF.block_size(), DestF.layout(), DestF, DestF.Allocator); auto Src = WritableMappedBlockStream::createStream( - SrcF.block_size(), SrcF.layout(), SrcF); + SrcF.block_size(), SrcF.layout(), SrcF, SrcF.Allocator); // First write "Test Str" into the source stream. BinaryStreamWriter SourceWriter(*Src); @@ -457,4 +469,27 @@ TEST(MappedBlockStreamTest, TestWriteDiscontiguousStreamRef) { EXPECT_EQ(Result, "Test Str"); } +TEST(MappedBlockStreamTest, DataLivesAfterStreamDestruction) { + std::vector<uint8_t> DataBytes(10); + MutableArrayRef<uint8_t> Data(DataBytes); + const uint32_t Blocks[] = {2, 1, 0, 6, 3, 4, 5, 7, 9, 8}; + + StringRef Str[] = {"Zero Str", ""}; + + DiscontiguousStream F(Blocks, Data); + { + auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(), + F, F.Allocator); + + BinaryStreamReader Reader(*S); + BinaryStreamWriter Writer(*S); + ::memset(DataBytes.data(), 0, 10); + EXPECT_NO_ERROR(Writer.writeCString(Str[0])); + EXPECT_NO_ERROR(Reader.readCString(Str[1])); + EXPECT_EQ(Str[0], Str[1]); + } + + EXPECT_EQ(Str[0], Str[1]); +} + } // end anonymous namespace diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp index d13547a842e4..db3d10847cd8 100644 --- a/unittests/Transforms/Utils/Cloning.cpp +++ b/unittests/Transforms/Utils/Cloning.cpp @@ -361,7 +361,7 @@ TEST_F(CloneFunc, NewFunctionCreated) { // Test that a new subprogram entry was added and is pointing to the new // function, while the original subprogram still points to the old one. TEST_F(CloneFunc, Subprogram) { - EXPECT_FALSE(verifyModule(*M)); + EXPECT_FALSE(verifyModule(*M, &errs())); EXPECT_EQ(3U, Finder->subprogram_count()); EXPECT_NE(NewFunc->getSubprogram(), OldFunc->getSubprogram()); } diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp index b89cee2ce4bb..34f5fbc6ea31 100644 --- a/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/utils/TableGen/X86FoldTablesEmitter.cpp @@ -101,6 +101,11 @@ const char *const NoFoldSet[] = { "BTS16rr", "BTS32rr", "BTS64rr", "BTS16mr", "BTS32mr", "BTS64mr", + // insertps cannot be folded without adjusting the immediate. There's custom + // code to handle it in X86InstrInfo.cpp, ignore it here. + "INSERTPSrr", "INSERTPSrm", + "VINSERTPSrr", "VINSERTPSrm", "VINSERTPSZrr", "VINSERTPSZrm", + // Memory folding is enabled only when optimizing for size by DAG // patterns only. (issue detailed in D28744 review) "VCVTSS2SDrm", "VCVTSS2SDrr", diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py index 104e9dac464d..8991588a868d 100644 --- a/utils/lit/lit/util.py +++ b/utils/lit/lit/util.py @@ -267,6 +267,20 @@ def usePlatformSdkOnDarwin(config, lit_config): lit_config.note('using SDKROOT: %r' % sdk_path) config.environment['SDKROOT'] = sdk_path +def findPlatformSdkVersionOnMacOS(config, lit_config): + if 'darwin' in config.target_triple: + try: + cmd = subprocess.Popen(['xcrun', '--show-sdk-version', '--sdk', 'macosx'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = cmd.communicate() + out = out.strip() + res = cmd.wait() + except OSError: + res = -1 + if res == 0 and out: + return out + return None + def killProcessAndChildren(pid): """ This function kills a process with ``pid`` and all its |