diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-01-31 19:27:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-01-31 19:27:28 +0000 |
commit | ec304151b74f9254d7029ee4d197ce1f7cbe501a (patch) | |
tree | 63e4ed55e4fbb581fd4731d44a327a7b3278e0a1 | |
parent | 67c32a98315f785a9ec9d531c1f571a0196c7463 (diff) | |
download | src-ec304151b74f9254d7029ee4d197ce1f7cbe501a.tar.gz src-ec304151b74f9254d7029ee4d197ce1f7cbe501a.zip |
Notes
119 files changed, 2405 insertions, 1149 deletions
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index dd54d45c99c9..f27fb0277085 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -102,6 +102,282 @@ enable handling of case (3). .. _discussions: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-May/073235.html +Metadata is not a Value +----------------------- + +Metadata nodes (``!{...}``) and strings (``!"..."``) are no longer values. +They have no use-lists, no type, cannot RAUW, and cannot be function-local. + +Bridges between Value and Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +LLVM intrinsics can reference metadata using the ``metadata`` type, and +metadata nodes can reference constant values. + +Function-local metadata is limited to direct arguments to LLVM intrinsics. + +Metadata is typeless +^^^^^^^^^^^^^^^^^^^^ + +The following old IR: + +.. code-block:: llvm + + @g = global i32 0 + + define void @foo(i32 %v) { + entry: + call void @llvm.md(metadata !{i32 %v}) + call void @llvm.md(metadata !{i32* @global}) + call void @llvm.md(metadata !0) + call void @llvm.md(metadata !{metadata !"string"}) + call void @llvm.md(metadata !{metadata !{metadata !1, metadata !"string"}}) + ret void, !bar !1, !baz !2 + } + + declare void @llvm.md(metadata) + + !0 = metadata !{metadata !1, metadata !2, metadata !3, metadata !"some string"} + !1 = metadata !{metadata !2, null, metadata !"other", i32* @global, i32 7} + !2 = metadata !{} + +should now be written as: + +.. code-block:: llvm + + @g = global i32 0 + + define void @foo(i32 %v) { + entry: + call void @llvm.md(metadata i32 %v) ; The only legal place for function-local + ; metadata. + call void @llvm.md(metadata i32* @global) + call void @llvm.md(metadata !0) + call void @llvm.md(metadata !{!"string"}) + call void @llvm.md(metadata !{!{!1, !"string"}}) + ret void, !bar !1, !baz !2 + } + + declare void @llvm.md(metadata) + + !0 = !{!1, !2, !3, !"some string"} + !1 = !{!2, null, !"other", i32* @global, i32 7} + !2 = !{} + +Distinct metadata nodes +^^^^^^^^^^^^^^^^^^^^^^^ + +Metadata nodes can opt-out of uniquing, using the keyword ``distinct``. +Distinct nodes are still owned by the context, but are stored in a side table, +and not uniqued. + +In LLVM 3.5, metadata nodes would drop uniquing if an operand changed to +``null`` during optimizations. This is no longer true. However, if an operand +change causes a uniquing collision, they become ``distinct``. Unlike LLVM 3.5, +where serializing to assembly or bitcode would re-unique the nodes, they now +remain ``distinct``. + +The following IR: + +.. code-block:: llvm + + !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} + + !0 = !{} + !1 = !{} + !2 = distinct !{} + !3 = distinct !{} + !4 = !{!0} + !5 = distinct !{!0} + !6 = !{!4, !{}, !5} + !7 = !{!{!0}, !0, !5} + !8 = distinct !{!{!0}, !0, !5} + +is equivalent to the following: + +.. code-block:: llvm + + !named = !{!0, !0, !1, !2, !3, !4, !5, !5, !6} + + !0 = !{} + !1 = distinct !{} + !2 = distinct !{} + !3 = !{!0} + !4 = distinct !{!0} + !5 = !{!3, !0, !4} + !6 = distinct !{!3, !0, !4} + +Constructing cyclic graphs +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +During graph construction, if a metadata node transitively references a forward +declaration, the node itself is considered "unresolved" until the forward +declaration resolves. An unresolved node can RAUW itself to support uniquing. +Nodes automatically resolve once all their operands have resolved. + +However, cyclic graphs prevent the nodes from resolving. An API client that +constructs a cyclic graph must call ``resolveCycles()`` to resolve nodes in the +cycle. + +To save self-references from that burden, self-referencing nodes are implicitly +``distinct``. So the following IR: + +.. code-block:: llvm + + !named = !{!0, !1, !2, !3, !4} + + !0 = !{!0} + !1 = !{!1} + !2 = !{!2, !1} + !3 = !{!2, !1} + !4 = !{!2, !1} + +is equivalent to: + +.. code-block:: llvm + + !named = !{!0, !1, !2, !3, !3} + + !0 = distinct !{!0} + !1 = distinct !{!1} + !2 = distinct !{!2, !1} + !3 = !{!2, !1} + +MDLocation (aka DebugLoc aka DILocation) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There's a new first-class metadata construct called ``MDLocation`` (to be +followed in subsequent releases by others). It's used for the locations +referenced by ``!dbg`` metadata attachments. + +For example, if an old ``!dbg`` attachment looked like this: + +.. code-block:: llvm + + define i32 @foo(i32 %a, i32 %b) { + entry: + %add = add i32 %a, %b, !dbg !0 + ret %add, !dbg !1 + } + + !0 = metadata !{i32 10, i32 3, metadata !2, metadata !1) + !1 = metadata !{i32 20, i32 7, metadata !3) + !2 = metadata !{...} + !3 = metadata !{...} + +the new attachment looks like this: + +.. code-block:: llvm + + define i32 @foo(i32 %a, i32 %b) { + entry: + %add = add i32 %a, %b, !dbg !0 + ret %add, !dbg !1 + } + + !0 = !MDLocation(line: 10, column: 3, scope: !2, inlinedAt: !1) + !1 = !MDLocation(line: 20, column: 7, scope: !3) + !2 = !{...} + !3 = !{...} + +The fields are named, can be reordered, and have sane defaults if left out +(although ``scope:`` is required). + + +Alias syntax change +----------------------- + +The syntax for aliases is now closer to what is used for global variables + +.. code-block:: llvm + + @a = weak global ... + @b = weak alias ... + +The order of the ``alias`` keyword and the linkage was swapped before. + +The old JIT has been removed +---------------------------- + +All users should transition to MCJIT. + + +object::Binary doesn't owns the file buffer +------------------------------------------- + +It is now just a wrapper, which simplifies using object::Binary with other +users of the underlying file. + +IR in object files is now supported +----------------------------------- + +Regular object files can contain IR in a section named ``.llvmbc``. + + +The gold plugin has been rewritten +---------------------------------- + +It is now implemented directly on top of lib/Linker instead of ``lib/LTO``. +The API of ``lib/LTO`` is sufficiently different from gold's view of the +linking process that some cases could not be conveniently implemented. + +The new implementation is also lazier and has a ``save-temps`` option. + + +Change in the representation of lazy loaded funcs +------------------------------------------------- + +Lazy loaded functions are now represented is a way that ``isDeclaration`` +returns the correct answer even before reading the body. + + +The opt option -std-compile-opts was removed +-------------------------------------------- + +It was effectively an alias of -O3. + + +Python 2.7 is now required +-------------------------- + +This was done to simplify compatibility with python 3. + +The leak detector has been removed +---------------------------------- + +In practice tools like asan and valgrind were finding way more bugs than +the old leak detector, so it was removed. + + +New comdat syntax +----------------- + +The syntax of comdats was changed to + +.. code-block:: llvm + + $c = comdat any + @g = global i32 0, comdat($c) + @c = global i32 0, comdat + +The version without the parentheses is a syntatic sugar for a comdat with +the same name as the global. + + +Diagnotic infrastructure used by lib/Linker and lib/Bitcode +----------------------------------------------------------- + +These libraries now use the diagnostic handler to print errors and warnings. +This provides better error messages and simpler error handling. + + +The PreserveSource linker mode was removed +------------------------------------------ + +It was fairly broken and was removed. + + + Changes to the ARM Backend -------------------------- @@ -237,8 +513,35 @@ An exciting aspect of LLVM is that it is used as an enabling technology for a lot of other language and tools projects. This section lists some of the projects that have already been updated to work with LLVM 3.6. -* A project - +Portable Computing Language (pocl) +---------------------------------- + +In addition to producing an easily portable open source OpenCL +implementation, another major goal of `pocl <http://portablecl.org/>`_ +is improving performance portability of OpenCL programs with +compiler optimizations, reducing the need for target-dependent manual +optimizations. An important part of pocl is a set of LLVM passes used to +statically parallelize multiple work-items with the kernel compiler, even in +the presence of work-group barriers. This enables static parallelization of +the fine-grained static concurrency in the work groups in multiple ways. + +TTA-based Co-design Environment (TCE) +------------------------------------- + +`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized +exposed datapath processors based on the Transport triggered +architecture (TTA). + +The toolset provides a complete co-design flow from C/C++ +programs down to synthesizable VHDL/Verilog and parallel program binaries. +Processor customization points include the register files, function units, +supported operations, and the interconnection network. + +TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent +optimizations and also for parts of code generation. It generates +new LLVM-based code generators "on the fly" for the designed processors and +loads them in to the compiler backend as runtime libraries to avoid +per-target recompilation of larger parts of the compiler chain. Additional Information ====================== diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index ab500a1dd2a3..7f7889ad5fb3 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -195,6 +195,9 @@ public: /// then return NULL. Pass *findAnalysisPass(AnalysisID AID); + /// Retrieve the PassInfo for an analysis. + const PassInfo *findAnalysisPassInfo(AnalysisID AID) const; + /// Find analysis usage information for the pass P. AnalysisUsage *findAnalysisUsage(Pass *P); @@ -251,6 +254,12 @@ private: SmallVector<ImmutablePass *, 16> ImmutablePasses; DenseMap<Pass *, AnalysisUsage *> AnUsageMap; + + /// Collection of PassInfo objects found via analysis IDs and in this top + /// level manager. This is used to memoize queries to the pass registry. + /// FIXME: This is an egregious hack because querying the pass registry is + /// either slow or racy. + mutable DenseMap<AnalysisID, const PassInfo *> AnalysisPassInfos; }; diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 216271086a40..a6d41392724e 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -61,6 +61,12 @@ public: /// markers. If not, data region directives will be ignored. bool hasDataInCodeSupport() const { return HasDataInCodeSupport; } + /// doesSectionRequireSymbols - Check whether the given section requires that + /// all symbols (even temporaries) have symbol table entries. + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } + /// @name Target Fixup Interfaces /// @{ diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 31f29eb1f3ff..681a31728799 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -11,7 +11,6 @@ #define LLVM_MC_MCASSEMBLER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -882,8 +881,6 @@ private: iplist<MCSymbolData> Symbols; - DenseSet<const MCSymbol *> LocalsUsedInReloc; - /// The map of sections to their associated assembler backend data. // // FIXME: Avoid this indirection? @@ -983,9 +980,6 @@ private: MCFragment &F, const MCFixup &Fixup); public: - void addLocalUsedInReloc(const MCSymbol &Sym); - bool isLocalUsedInReloc(const MCSymbol &Sym) const; - /// Compute the effective fragment size assuming it is laid out at the given /// \p SectionAddress and \p FragmentOffset. uint64_t computeFragmentSize(const MCAsmLayout &Layout, diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index e4681c0a3dc3..0c5aa8a18063 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -68,10 +68,12 @@ public: /// @name API /// @{ - virtual void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + virtual void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, + const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) = 0; /// @} @@ -95,14 +97,8 @@ class MachObjectWriter : public MCObjectWriter { /// @name Relocation Data /// @{ - struct RelAndSymbol { - const MCSymbolData *Sym; - MachO::any_relocation_info MRE; - RelAndSymbol(const MCSymbolData *Sym, const MachO::any_relocation_info &MRE) - : Sym(Sym), MRE(MRE) {} - }; - - llvm::DenseMap<const MCSectionData *, std::vector<RelAndSymbol>> Relocations; + llvm::DenseMap<const MCSectionData*, + std::vector<MachO::any_relocation_info> > Relocations; llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; /// @} @@ -217,15 +213,9 @@ public: // - Input errors, where something cannot be correctly encoded. 'as' allows // these through in many cases. - // Add a relocation to be output in the object file. At the time this is - // called, the symbol indexes are not know, so if the relocation refers - // to a symbol it should be passed as \p RelSymbol so that it can be updated - // afterwards. If the relocation doesn't refer to a symbol, nullptr should be - // used. - void addRelocation(const MCSymbolData *RelSymbol, const MCSectionData *SD, + void addRelocation(const MCSectionData *SD, MachO::any_relocation_info &MRE) { - RelAndSymbol P(RelSymbol, MRE); - Relocations[SD].push_back(P); + Relocations[SD].push_back(MRE); } void RecordScatteredRelocation(const MCAssembler &Asm, @@ -241,7 +231,7 @@ public: const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override; diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index 173ef416f2cc..55c828c6c179 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -76,10 +76,12 @@ public: /// post layout binding. The implementation is responsible for storing /// information about the relocation so that it can be emitted during /// WriteObject(). - virtual void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + virtual void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) = 0; + bool &IsPCRel, + uint64_t &FixedValue) = 0; /// \brief Check whether the difference (A - B) between two symbol /// references is fully resolved. diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index d2f096fd1efd..70ef0eb765dd 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Target/TargetLibraryInfo.h" namespace llvm { class Value; @@ -53,8 +54,10 @@ private: Value *optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSetChk(CallInst *CI, IRBuilder<> &B); - Value *optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B); - Value *optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B); + + // Str/Stp cpy are similar enough to be handled in the same functions. + Value *optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc::Func Func); + Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc::Func Func); /// \brief Checks whether the call \p CI to a fortified libcall is foldable /// to the non-fortified version. diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 86852d634ff2..ee4273770555 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3154,8 +3154,9 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, if (LHS == RHS) return getConstant(LHS->getType(), 0); - // X - Y --> X + -Y - return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); + // X - Y --> X + -Y. + // X -(nsw || nuw) Y --> X + -Y. + return getAddExpr(LHS, getNegativeSCEV(RHS)); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -3461,12 +3462,10 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) Flags = setFlags(Flags, SCEV::FlagNUW); } - } else if (const SubOperator *OBO = - dyn_cast<SubOperator>(BEValueV)) { - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); + + // We cannot transfer nuw and nsw flags from subtraction + // operations -- sub nuw X, Y is not the same as add nuw X, -Y + // for instance. } const SCEV *StartVal = getSCEV(StartValueV); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 3d3da2ac1d31..455258e81e6d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -626,10 +626,7 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); - // If this type is not derived from any type or the type is a declaration then - // take conservative approach. - if (!BaseType.isValid() || BaseType.isForwardDecl()) - return Ty.getSizeInBits(); + assert(BaseType.isValid()); // If this is a derived type, go ahead and get the base type, unless it's a // reference then it's just the size of the field. Pointer types have no need @@ -1473,7 +1470,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { uint64_t FieldSize = getBaseTypeSize(DD, DT); uint64_t OffsetInBytes; - if (Size != FieldSize) { + if (FieldSize && Size != FieldSize) { // Handle bitfield, assume bytes are 8 bits. addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 789f2042a073..2f076b6734d9 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -11,11 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" using namespace llvm; Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, @@ -43,15 +51,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { // because CodeGen overloads that to mean preserving the MachineBasicBlock // CFG in addition to the LLVM IR CFG. AU.addPreserved<AliasAnalysis>(); - AU.addPreserved("scalar-evolution"); - AU.addPreserved("iv-users"); - AU.addPreserved("memdep"); - AU.addPreserved("live-values"); - AU.addPreserved("domtree"); - AU.addPreserved("domfrontier"); - AU.addPreserved("loops"); - AU.addPreserved("lda"); - AU.addPreserved("stack-protector"); + AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<IVUsers>(); + AU.addPreserved<LoopInfo>(); + AU.addPreserved<MemoryDependenceAnalysis>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index a25c4d66d3bb..753d9c229eca 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -563,9 +563,23 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) { return nullptr; } -void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) { - MDNode *N = - MD ? cast<MDNode>(unwrap<MetadataAsValue>(MD)->getMetadata()) : nullptr; +// MetadataAsValue uses a canonical format which strips the actual MDNode for +// MDNode with just a single constant value, storing just a ConstantAsMetadata +// This undoes this canonicalization, reconstructing the MDNode. +static MDNode *extractMDNode(MetadataAsValue *MAV) { + Metadata *MD = MAV->getMetadata(); + assert((isa<MDNode>(MD) || isa<ConstantAsMetadata>(MD)) && + "Expected a metadata node or a canonicalized constant"); + + if (MDNode *N = dyn_cast<MDNode>(MD)) + return N; + + return MDNode::get(MAV->getContext(), MD); +} + +void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) { + MDNode *N = Val ? extractMDNode(unwrap<MetadataAsValue>(Val)) : nullptr; + unwrap<Instruction>(Inst)->setMetadata(KindID, N); } @@ -795,7 +809,7 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name, return; if (!Val) return; - N->addOperand(cast<MDNode>(unwrap<MetadataAsValue>(Val)->getMetadata())); + N->addOperand(extractMDNode(unwrap<MetadataAsValue>(Val))); } /*--.. Operations on scalar constants ......................................--*/ diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index b9ab25651fa2..fa8d50ec160c 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -600,8 +600,7 @@ void PMTopLevelManager::schedulePass(Pass *P) { // If P is an analysis pass and it is available then do not // generate the analysis again. Stale analysis info should not be // available at this point. - const PassInfo *PI = - PassRegistry::getPassRegistry()->getPassInfo(P->getPassID()); + const PassInfo *PI = findAnalysisPassInfo(P->getPassID()); if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) { delete P; return; @@ -619,7 +618,7 @@ void PMTopLevelManager::schedulePass(Pass *P) { Pass *AnalysisPass = findAnalysisPass(*I); if (!AnalysisPass) { - const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + const PassInfo *PI = findAnalysisPassInfo(*I); if (!PI) { // Pass P is not in the global PassRegistry @@ -716,8 +715,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { return *I; // If Pass not found then check the interfaces implemented by Immutable Pass - const PassInfo *PassInf = - PassRegistry::getPassRegistry()->getPassInfo(PI); + const PassInfo *PassInf = findAnalysisPassInfo(PI); assert(PassInf && "Expected all immutable passes to be initialized"); const std::vector<const PassInfo*> &ImmPI = PassInf->getInterfacesImplemented(); @@ -731,6 +729,17 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { return nullptr; } +const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const { + const PassInfo *&PI = AnalysisPassInfos[AID]; + if (!PI) + PI = PassRegistry::getPassRegistry()->getPassInfo(AID); + else + assert(PI == PassRegistry::getPassRegistry()->getPassInfo(AID) && + "The pass info pointer changed for an analysis ID!"); + + return PI; +} + // Print passes managed by this top level manager. void PMTopLevelManager::dumpPasses() const { @@ -759,8 +768,7 @@ void PMTopLevelManager::dumpArguments() const { dbgs() << "Pass Arguments: "; for (SmallVectorImpl<ImmutablePass *>::const_iterator I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) - if (const PassInfo *PI = - PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) { + if (const PassInfo *PI = findAnalysisPassInfo((*I)->getPassID())) { assert(PI && "Expected all immutable passes to be initialized"); if (!PI->isAnalysisGroup()) dbgs() << " -" << PI->getPassArgument(); @@ -824,7 +832,7 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) { // This pass is the current implementation of all of the interfaces it // implements as well. - const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI); + const PassInfo *PInf = TPM->findAnalysisPassInfo(PI); if (!PInf) return; const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented(); for (unsigned i = 0, e = II.size(); i != e; ++i) @@ -957,7 +965,7 @@ void PMDataManager::freePass(Pass *P, StringRef Msg, } AnalysisID PI = P->getPassID(); - if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) { + if (const PassInfo *PInf = TPM->findAnalysisPassInfo(PI)) { // Remove the pass itself (if it is not already removed). AvailableAnalysis.erase(PI); @@ -1037,7 +1045,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { for (SmallVectorImpl<AnalysisID>::iterator I = ReqAnalysisNotAvailable.begin(), E = ReqAnalysisNotAvailable.end() ;I != E; ++I) { - const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + const PassInfo *PI = TPM->findAnalysisPassInfo(*I); Pass *AnalysisPass = PI->createPass(); this->addLowerLevelRequiredPass(P, AnalysisPass); } @@ -1142,7 +1150,7 @@ void PMDataManager::dumpPassArguments() const { PMD->dumpPassArguments(); else if (const PassInfo *PI = - PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) + TPM->findAnalysisPassInfo((*I)->getPassID())) if (!PI->isAnalysisGroup()) dbgs() << " -" << PI->getPassArgument(); } @@ -1218,7 +1226,7 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; for (unsigned i = 0; i != Set.size(); ++i) { if (i) dbgs() << ','; - const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); + const PassInfo *PInf = TPM->findAnalysisPassInfo(Set[i]); if (!PInf) { // Some preserved passes, such as AliasAnalysis, may not be initialized by // all drivers. @@ -1658,8 +1666,8 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { OnTheFlyManagers[P] = FPP; } - const PassInfo * RequiredPassPI = - PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID()); + const PassInfo *RequiredPassPI = + TPM->findAnalysisPassInfo(RequiredPass->getPassID()); Pass *FoundPass = nullptr; if (RequiredPassPI && RequiredPassPI->isAnalysis()) { diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 4dcf910e01a7..e2439abaf001 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -219,7 +219,7 @@ class ELFObjectWriter : public MCObjectWriter { const MCSymbolData *SD, uint64_t C, unsigned Type) const; - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override; @@ -789,11 +789,13 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) { return nullptr; } -void ELFObjectWriter::RecordRelocation(MCAssembler &Asm, +void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + const MCFixup &Fixup, + MCValue Target, + bool &IsPCRel, + uint64_t &FixedValue) { const MCSectionData *FixupSection = Fragment->getParent(); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index f7054902f24c..04cc0ff4a864 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -27,7 +27,22 @@ bool MCAsmInfoDarwin::isSectionAtomizableBySymbols( // contain. // Sections holding 2 byte strings require symbols in order to be atomized. // There is no dedicated section for 4 byte strings. - if (SMO.getType() == MachO::S_CSTRING_LITERALS) + if (SMO.getKind().isMergeable1ByteCString()) + return false; + + if (SMO.getSegmentName() == "__TEXT" && + SMO.getSectionName() == "__objc_classname" && + SMO.getType() == MachO::S_CSTRING_LITERALS) + return false; + + if (SMO.getSegmentName() == "__TEXT" && + SMO.getSectionName() == "__objc_methname" && + SMO.getType() == MachO::S_CSTRING_LITERALS) + return false; + + if (SMO.getSegmentName() == "__TEXT" && + SMO.getSectionName() == "__objc_methtype" && + SMO.getType() == MachO::S_CSTRING_LITERALS) return false; if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring") diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 45d49fae94bf..e3c2443f4a21 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -425,16 +425,6 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const { return true; } -void MCAssembler::addLocalUsedInReloc(const MCSymbol &Sym) { - assert(Sym.isTemporary()); - LocalsUsedInReloc.insert(&Sym); -} - -bool MCAssembler::isLocalUsedInReloc(const MCSymbol &Sym) const { - assert(Sym.isTemporary()); - return LocalsUsedInReloc.count(&Sym); -} - bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { // Non-temporary labels should always be visible to the linker. if (!Symbol.isTemporary()) @@ -444,10 +434,8 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { if (!Symbol.isInSection()) return false; - if (isLocalUsedInReloc(Symbol)) - return true; - - return false; + // Otherwise, check if the section requires symbols even for temporary labels. + return getBackend().doesSectionRequireSymbols(Symbol.getSection()); } const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 588d424120c4..d3751bd9ba57 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -448,11 +448,14 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand( assert(OS.tell() - Start == Size); } -void MachObjectWriter::RecordRelocation(MCAssembler &Asm, + +void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + const MCFixup &Fixup, + MCValue Target, + bool &IsPCRel, + uint64_t &FixedValue) { TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup, Target, FixedValue); } @@ -613,22 +616,6 @@ void MachObjectWriter::ComputeSymbolTable( ExternalSymbolData[i].SymbolData->setIndex(Index++); for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) UndefinedSymbolData[i].SymbolData->setIndex(Index++); - - for (const MCSectionData &SD : Asm) { - std::vector<RelAndSymbol> &Relocs = Relocations[&SD]; - for (RelAndSymbol &Rel : Relocs) { - if (!Rel.Sym) - continue; - - // Set the Index and the IsExtern bit. - unsigned Index = Rel.Sym->getIndex(); - assert(isInt<24>(Index)); - if (IsLittleEndian) - Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (-1 << 24)) | Index | (1 << 27); - else - Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); - } - } } void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, @@ -675,6 +662,10 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // Mark symbol difference expressions in variables (from .set or = directives) // as absolute. markAbsoluteVariableSymbols(Asm, Layout); + + // Compute symbol table information and bind symbol indices. + ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); } bool MachObjectWriter:: @@ -758,10 +749,6 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { - // Compute symbol table information and bind symbol indices. - ComputeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, - UndefinedSymbolData); - unsigned NumSections = Asm.size(); const MCAssembler::VersionMinInfoType &VersionInfo = Layout.getAssembler().getVersionMinInfo(); @@ -852,7 +839,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - std::vector<RelAndSymbol> &Relocs = Relocations[it]; + std::vector<MachO::any_relocation_info> &Relocs = Relocations[it]; unsigned NumRelocs = Relocs.size(); uint64_t SectionStart = SectionDataStart + getSectionAddress(it); WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); @@ -946,10 +933,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, ie = Asm.end(); it != ie; ++it) { // Write the section relocation entries, in reverse order to match 'as' // (approximately, the exact algorithm is more complicated than this). - std::vector<RelAndSymbol> &Relocs = Relocations[it]; + std::vector<MachO::any_relocation_info> &Relocs = Relocations[it]; for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - Write32(Relocs[e - i - 1].MRE.r_word0); - Write32(Relocs[e - i - 1].MRE.r_word1); + Write32(Relocs[e - i - 1].r_word0); + Write32(Relocs[e - i - 1].r_word1); } } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index d8729bdbc4bb..c17f99b9bd7b 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -175,7 +175,7 @@ public: const MCFragment &FB, bool InSet, bool IsPCRel) const override; - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override; @@ -661,9 +661,13 @@ bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( InSet, IsPCRel); } -void WinCOFFObjectWriter::RecordRelocation( - MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { +void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + bool &IsPCRel, + uint64_t &FixedValue) { assert(Target.getSymA() && "Relocation must reference a symbol!"); const MCSymbol &Symbol = Target.getSymA()->getSymbol(); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index f923a9aa87ae..0838e90baaec 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -246,13 +246,21 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { if (ArchName.startswith("armv")) { offset = 3; - arch = Triple::arm; + if (ArchName.endswith("eb")) { + arch = Triple::armeb; + ArchName = ArchName.substr(0, ArchName.size() - 2); + } else + arch = Triple::arm; } else if (ArchName.startswith("armebv")) { offset = 5; arch = Triple::armeb; } else if (ArchName.startswith("thumbv")) { offset = 5; - arch = Triple::thumb; + if (ArchName.endswith("eb")) { + arch = Triple::thumbeb; + ArchName = ArchName.substr(0, ArchName.size() - 2); + } else + arch = Triple::thumb; } else if (ArchName.startswith("thumbebv")) { offset = 7; arch = Triple::thumbeb; @@ -271,6 +279,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { } static Triple::ArchType parseArch(StringRef ArchName) { + Triple::ArchType ARMArch(parseARMArch(ArchName)); + return StringSwitch<Triple::ArchType>(ArchName) .Cases("i386", "i486", "i586", "i686", Triple::x86) // FIXME: Do we need to support these? @@ -280,9 +290,10 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("powerpc64", "ppu", Triple::ppc64) .Case("powerpc64le", Triple::ppc64le) .Case("xscale", Triple::arm) - .StartsWith("arm", parseARMArch(ArchName)) - .StartsWith("thumb", parseARMArch(ArchName)) - .StartsWith("aarch64", parseARMArch(ArchName)) + .Case("xscaleeb", Triple::armeb) + .StartsWith("arm", ARMArch) + .StartsWith("thumb", ARMArch) + .StartsWith("aarch64", ARMArch) .Case("msp430", Triple::msp430) .Cases("mips", "mipseb", "mipsallegrex", Triple::mips) .Cases("mipsel", "mipsallegrexel", Triple::mipsel) @@ -379,6 +390,9 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) { } static Triple::SubArchType parseSubArch(StringRef SubArchName) { + if (SubArchName.endswith("eb")) + SubArchName = SubArchName.substr(0, SubArchName.size() - 2); + return StringSwitch<Triple::SubArchType>(SubArchName) .EndsWith("v8", Triple::ARMSubArch_v8) .EndsWith("v8a", Triple::ARMSubArch_v8) @@ -1022,6 +1036,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { offset = 5; if (offset != StringRef::npos && MArch.substr(offset, 2) == "eb") offset += 2; + if (MArch.endswith("eb")) + MArch = MArch.substr(0, MArch.size() - 2); if (offset != StringRef::npos) result = llvm::StringSwitch<const char *>(MArch.substr(offset)) .Cases("v2", "v2a", "arm2") diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 1a8040275ca8..a391e766deea 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -204,6 +204,44 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[ [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> ]>; +//===----------------------------------------------------------------------===// +// ARM64 Calling Convention for GHC +//===----------------------------------------------------------------------===// + +// This calling convention is specific to the Glasgow Haskell Compiler. +// The only documentation is the GHC source code, specifically the C header +// file: +// +// https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h +// +// which defines the registers for the Spineless Tagless G-Machine (STG) that +// GHC uses to implement lazy evaluation. The generic STG machine has a set of +// registers which are mapped to appropriate set of architecture specific +// registers for each CPU architecture. +// +// The STG Machine is documented here: +// +// https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode +// +// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI +// register mapping". + +def CC_AArch64_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; + // FIXME: LR is only callee-saved in the sense that *we* preserve it and are // presumably a callee to someone. External functions may not do so, but this // is currently safe since BL has LR as an implicit-def and what happens after a @@ -249,3 +287,4 @@ def CSR_AArch64_AllRegs (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), (sequence "Q%u", 0, 31))>; +def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 419fbc8be6c9..ca4e97bd7c03 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -302,6 +302,8 @@ static unsigned getImplicitScaleFactor(MVT VT) { CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { if (CC == CallingConv::WebKit_JS) return CC_AArch64_WebKit_JS; + if (CC == CallingConv::GHC) + return CC_AArch64_GHC; return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 66aa216db2c9..d8e91562e316 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -215,6 +215,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { bool HasFP = hasFP(MF); DebugLoc DL = MBB.findDebugLoc(MBBI); + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); @@ -451,6 +456,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d44f992a2ab..19f51ce9450c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1990,6 +1990,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, llvm_unreachable("Unsupported calling convention."); case CallingConv::WebKit_JS: return CC_AArch64_WebKit_JS; + case CallingConv::GHC: + return CC_AArch64_GHC; case CallingConv::C: case CallingConv::Fast: if (!Subtarget->isTargetDarwin()) diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index d734d436add7..206cdbbe0c5b 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -33,6 +33,10 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" +static cl::opt<bool> +ReserveX18("aarch64-reserve-x18", cl::Hidden, + cl::desc("Reserve X18, making it unavailable as GPR")); + AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti) : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} @@ -40,6 +44,10 @@ AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::GHC) + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_AArch64_NoRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; else @@ -48,6 +56,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t * AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; else @@ -63,7 +74,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { } const uint32_t * -AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { +AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // This should return a register mask that is the same as that returned by // getCallPreservedMask but that additionally preserves the register used for // the first i64 argument (which must also be the register used to return a @@ -71,6 +82,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { // // In case that the calling convention does not use the same register for // both, the function should return NULL (does not currently apply) + assert(CC != CallingConv::GHC && "should not be GHC calling convention."); return CSR_AArch64_AAPCS_ThisReturn_RegMask; } @@ -90,7 +102,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AArch64::W29); } - if (STI->isTargetDarwin()) { + if (STI->isTargetDarwin() || ReserveX18) { Reserved.set(AArch64::X18); // Platform register Reserved.set(AArch64::W18); } @@ -117,7 +129,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, return true; case AArch64::X18: case AArch64::W18: - return STI->isTargetDarwin(); + return STI->isTargetDarwin() || ReserveX18; case AArch64::FP: case AArch64::W29: return TFI->hasFP(MF) || STI->isTargetDarwin(); @@ -379,7 +391,7 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case AArch64::GPR64commonRegClassID: return 32 - 1 // XZR/SP - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - - STI->isTargetDarwin() // X18 reserved as platform register + - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register - hasBasePointer(MF); // X19 case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 423da6500c48..27cbac9f12e2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -317,6 +317,42 @@ public: MachO::CPU_SUBTYPE_ARM64_ALL); } + bool doesSectionRequireSymbols(const MCSection &Section) const override { + // Any section for which the linker breaks things into atoms needs to + // preserve symbols, including assembler local symbols, to identify + // those atoms. These sections are: + // Sections of type: + // + // S_CSTRING_LITERALS (e.g. __cstring) + // S_LITERAL_POINTERS (e.g. objc selector pointers) + // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS + // + // Sections named: + // + // __TEXT,__eh_frame + // __TEXT,__ustring + // __DATA,__cfstring + // __DATA,__objc_classrefs + // __DATA,__objc_catlist + // + // FIXME: It would be better if the compiler used actual linker local + // symbols for each of these sections rather than preserving what + // are ostensibly assembler local symbols. + const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section); + return (SMO.getType() == MachO::S_CSTRING_LITERALS || + SMO.getType() == MachO::S_4BYTE_LITERALS || + SMO.getType() == MachO::S_8BYTE_LITERALS || + SMO.getType() == MachO::S_16BYTE_LITERALS || + SMO.getType() == MachO::S_LITERAL_POINTERS || + (SMO.getSegmentName() == "__TEXT" && + (SMO.getSectionName() == "__eh_frame" || + SMO.getSectionName() == "__ustring")) || + (SMO.getSegmentName() == "__DATA" && + (SMO.getSectionName() == "__cfstring" || + SMO.getSectionName() == "__objc_classrefs" || + SMO.getSectionName() == "__objc_catlist"))); + } + /// \brief Generate the compact unwind encoding from the CFI directives. uint32_t generateCompactUnwindEncoding( ArrayRef<MCCFIInstruction> Instrs) const override { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index f6fab5d6b6fe..e12a24be0a67 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,7 +10,6 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -34,7 +33,7 @@ public: : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; @@ -113,25 +112,8 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( } } -static bool canUseLocalRelocation(const MCSectionMachO &Section, - const MCSymbol &Symbol, unsigned Log2Size) { - // Debug info sections can use local relocations. - if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) - return true; - - // Otherwise, only pointer sized relocations are supported. - if (Log2Size != 3) - return false; - - // But only if they don't point to a cstring. - if (!Symbol.isInSection()) - return true; - const MCSectionMachO &RefSec = cast<MCSectionMachO>(Symbol.getSection()); - return RefSec.getType() != MachO::S_CSTRING_LITERALS; -} - void AArch64MachObjectWriter::RecordRelocation( - MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, + MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); @@ -141,9 +123,9 @@ void AArch64MachObjectWriter::RecordRelocation( unsigned Log2Size = 0; int64_t Value = 0; unsigned Index = 0; + unsigned IsExtern = 0; unsigned Type = 0; unsigned Kind = Fixup.getKind(); - const MCSymbolData *RelSymbol = nullptr; FixupOffset += Fixup.getOffset(); @@ -189,8 +171,10 @@ void AArch64MachObjectWriter::RecordRelocation( // FIXME: Should this always be extern? // SymbolNum of 0 indicates the absolute section. Type = MachO::ARM64_RELOC_UNSIGNED; + Index = 0; if (IsPCRel) { + IsExtern = 1; Asm.getContext().FatalError(Fixup.getLoc(), "PC relative absolute relocation!"); @@ -214,12 +198,15 @@ void AArch64MachObjectWriter::RecordRelocation( Layout.getSymbolOffset(&B_SD) == Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) { // SymB is the PC, so use a PC-rel pointer-to-GOT relocation. + Index = A_Base->getIndex(); + IsExtern = 1; Type = MachO::ARM64_RELOC_POINTER_TO_GOT; IsPCRel = 1; MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); - Writer->addRelocation(A_Base, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); return; } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) @@ -265,30 +252,25 @@ void AArch64MachObjectWriter::RecordRelocation( ? 0 : Writer->getSymbolAddress(B_Base, Layout)); + Index = A_Base->getIndex(); + IsExtern = 1; Type = MachO::ARM64_RELOC_UNSIGNED; MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); - Writer->addRelocation(A_Base, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); - RelSymbol = B_Base; + Index = B_Base->getIndex(); + IsExtern = 1; Type = MachO::ARM64_RELOC_SUBTRACTOR; } else { // A + constant const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - const MCSectionMachO &Section = static_cast<const MCSectionMachO &>( - Fragment->getParent()->getSection()); - - bool CanUseLocalRelocation = - canUseLocalRelocation(Section, *Symbol, Log2Size); - if (Symbol->isTemporary() && (Value || !CanUseLocalRelocation)) { - const MCSection &Sec = Symbol->getSection(); - if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) - Asm.addLocalUsedInReloc(*Symbol); - } - const MCSymbolData &SD = Asm.getSymbolData(*Symbol); const MCSymbolData *Base = Asm.getAtom(&SD); + const MCSectionMachO &Section = static_cast<const MCSectionMachO &>( + Fragment->getParent()->getSection()); // If the symbol is a variable and we weren't able to get a Base for it // (i.e., it's not in the symbol table associated with a section) resolve @@ -328,13 +310,16 @@ void AArch64MachObjectWriter::RecordRelocation( // sections, and for pointer-sized relocations (.quad), we allow section // relocations. It's code sections that run into trouble. if (Base) { - RelSymbol = Base; + Index = Base->getIndex(); + IsExtern = 1; // Add the local offset, if needed. if (Base != &SD) Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); } else if (Symbol->isInSection()) { - if (!CanUseLocalRelocation) + // Pointer-sized relocations can use a local relocation. Otherwise, + // we have to be in a debug info section. + if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3) Asm.getContext().FatalError( Fixup.getLoc(), "unsupported relocation of local symbol '" + Symbol->getName() + @@ -344,6 +329,7 @@ void AArch64MachObjectWriter::RecordRelocation( const MCSectionData &SymSD = Asm.getSectionData(SD.getSymbol().getSection()); Index = SymSD.getOrdinal() + 1; + IsExtern = 0; Value += Writer->getSymbolAddress(&SD, Layout); if (IsPCRel) @@ -376,16 +362,16 @@ void AArch64MachObjectWriter::RecordRelocation( MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = - (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); - Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); // Now set up the Addend relocation. Type = MachO::ARM64_RELOC_ADDEND; Index = Value; - RelSymbol = nullptr; IsPCRel = 0; Log2Size = 2; + IsExtern = 0; // Put zero into the instruction itself. The addend is in the relocation. Value = 0; @@ -397,9 +383,9 @@ void AArch64MachObjectWriter::RecordRelocation( // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = - (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); - Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); } MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c429ac185211..fda3e815624d 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -567,10 +567,21 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // MOV NewBase, Base // ADDS NewBase, #imm8. if (Base != NewBase && Offset >= 8) { + const ARMSubtarget &Subtarget = MBB.getParent()->getTarget() + .getSubtarget<ARMSubtarget>(); // Need to insert a MOV to the new base first. - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && + !Subtarget.hasV6Ops()) { + // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr + if (Pred != ARMCC::AL) + return false; + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) + .addReg(Base, getKillRegState(BaseKill)); + } else + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. Base = NewBase; BaseKill = false; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6a00b28f37a7..96f3b4e64326 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -9191,27 +9191,39 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { // FIXME: This is duplicated in getARMFPUFeatures() in // tools/clang/lib/Driver/Tools.cpp static const struct { - const unsigned Fpu; + const unsigned ID; const uint64_t Enabled; const uint64_t Disabled; -} Fpus[] = { - {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, - {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, - {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON}, - {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON}, - {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON}, - {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON}, - {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16, - ARM::FeatureNEON | ARM::FeatureCrypto}, - {ARM::FP_ARMV8, ARM::FeatureFPARMv8, - ARM::FeatureNEON | ARM::FeatureCrypto}, - {ARM::NEON, ARM::FeatureNEON, 0}, - {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0}, - {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON, - ARM::FeatureCrypto}, - {ARM::CRYPTO_NEON_FP_ARMV8, - ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0}, - {ARM::SOFTVFP, 0, 0}, +} FPUs[] = { + {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, + {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, + {ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON}, + {ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, + ARM::FeatureNEON}, + {ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, + ARM::FeatureNEON}, + {ARM::VFPV4_D16, + ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16, + ARM::FeatureNEON}, + {ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureD16, + ARM::FeatureNEON | ARM::FeatureCrypto}, + {ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8, + ARM::FeatureNEON | ARM::FeatureCrypto}, + {ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0}, + {ARM::NEON_VFPV4, + ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON, + 0}, + {ARM::NEON_FP_ARMV8, + ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureNEON, + ARM::FeatureCrypto}, + {ARM::CRYPTO_NEON_FP_ARMV8, + ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, + 0}, + {ARM::SOFTVFP, 0, 0}, }; /// parseDirectiveFPU @@ -9229,14 +9241,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { return false; } - for (const auto &Fpu : Fpus) { - if (Fpu.Fpu != ID) + for (const auto &Entry : FPUs) { + if (Entry.ID != ID) continue; // Need to toggle features that should be on but are off and that // should off but are on. - uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) | - (Fpu.Disabled & STI.getFeatureBits()); + uint64_t Toggle = (Entry.Enabled & ~STI.getFeatureBits()) | + (Entry.Disabled & STI.getFeatureBits()); setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle))); break; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 3187d36f7519..7da500390ed1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -54,10 +54,10 @@ public: : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, - const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) override; + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) override; }; } @@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } MachO::any_relocation_info MRE; @@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, @@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } MachO::any_relocation_info MRE; @@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, @@ -351,10 +351,11 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, } void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, - MCAssembler &Asm, + const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, + const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; @@ -400,8 +401,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned Index = 0; + unsigned IsExtern = 0; unsigned Type = 0; - const MCSymbolData *RelSymbol = nullptr; if (Target.isAbsolute()) { // constant // FIXME! @@ -421,7 +422,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Check whether we need an external or internal relocation. if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, FixedValue)) { - RelSymbol = SD; + IsExtern = 1; + Index = SD->getIndex(); // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was @@ -445,8 +447,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = - (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. @@ -471,10 +476,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, (Log2Size << 25) | (MachO::ARM_RELOC_PAIR << 28)); - Writer->addRelocation(nullptr, Fragment->getParent(), MREPair); + Writer->addRelocation(Fragment->getParent(), MREPair); } - Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 2b459a4336b3..0c2407508869 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -15,6 +15,7 @@ #include "Hexagon.h" #include "HexagonTargetMachine.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -42,7 +43,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineFunctionAnalysis>(); AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved("stack-protector"); + AU.addPreserved<StackProtector>(); FunctionPass::getAnalysisUsage(AU); } }; diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index d4852c4ece40..74796954a528 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -497,14 +497,14 @@ getOpndList(SmallVectorImpl<SDValue> &Ops, SDValue JumpTarget = Callee; // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. + // -relocation-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { unsigned V0Reg = Mips::V0; if (NeedMips16Helper) { RegsToPass.push_front(std::make_pair(V0Reg, Callee)); JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget); - JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG, + JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG, MipsII::MO_GOT, Chain, FuncInfo->callPtrInfo(S->getSymbol())); } else diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 6d6735b3aae6..185d12ec93fd 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -756,7 +756,7 @@ def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>, ISA_MIPS32R6; def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>, ISA_MIPS32R6; -def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, +def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, ISA_MIPS32R6; def : MipsPat<(setne f32:$lhs, f32:$rhs), (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; @@ -776,7 +776,7 @@ def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>, ISA_MIPS32R6; def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>, ISA_MIPS32R6; -def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, +def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, ISA_MIPS32R6; def : MipsPat<(setne f64:$lhs, f64:$rhs), (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 99fd739c3ed0..d25f5637f57c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1613,22 +1613,22 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) // %gp_rel relocation - return getAddrGPRel(N, Ty, DAG); + return getAddrGPRel(N, SDLoc(N), Ty, DAG); // %hi/%lo relocation - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(N, SDLoc(N), Ty, DAG); } if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV))) - return getAddrLocal(N, Ty, DAG, + return getAddrLocal(N, SDLoc(N), Ty, DAG, Subtarget.isABI_N32() || Subtarget.isABI_N64()); if (LargeGOT) - return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16, + return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, DAG.getEntryNode(), MachinePointerInfo::getGOT()); - return getAddrGlobal(N, Ty, DAG, + return getAddrGlobal(N, SDLoc(N), Ty, DAG, (Subtarget.isABI_N32() || Subtarget.isABI_N64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16, @@ -1642,9 +1642,9 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !Subtarget.isABI_N64()) - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(N, SDLoc(N), Ty, DAG); - return getAddrLocal(N, Ty, DAG, + return getAddrLocal(N, SDLoc(N), Ty, DAG, Subtarget.isABI_N32() || Subtarget.isABI_N64()); } @@ -1735,9 +1735,9 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !Subtarget.isABI_N64()) - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(N, SDLoc(N), Ty, DAG); - return getAddrLocal(N, Ty, DAG, + return getAddrLocal(N, SDLoc(N), Ty, DAG, Subtarget.isABI_N32() || Subtarget.isABI_N64()); } @@ -1754,12 +1754,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const if (TLOF.IsConstantInSmallSection(N->getConstVal(), getTargetMachine())) // %gp_rel relocation - return getAddrGPRel(N, Ty, DAG); + return getAddrGPRel(N, SDLoc(N), Ty, DAG); - return getAddrNonPIC(N, Ty, DAG); + return getAddrNonPIC(N, SDLoc(N), Ty, DAG); } - return getAddrLocal(N, Ty, DAG, + return getAddrLocal(N, SDLoc(N), Ty, DAG, Subtarget.isABI_N32() || Subtarget.isABI_N64()); } @@ -2681,15 +2681,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InternalLinkage = Val->hasInternalLinkage(); if (InternalLinkage) - Callee = getAddrLocal(G, Ty, DAG, + Callee = getAddrLocal(G, DL, Ty, DAG, Subtarget.isABI_N32() || Subtarget.isABI_N64()); else if (LargeGOT) { - Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16, + Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Val)); IsCallReloc = true; } else { - Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain, + Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain, FuncInfo->callPtrInfo(Val)); IsCallReloc = true; } @@ -2702,15 +2702,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const char *Sym = S->getSymbol(); if (!Subtarget.isABI_N64() && !IsPIC) // !N64 && static - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), - MipsII::MO_NO_FLAG); + Callee = + DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG); else if (LargeGOT) { - Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16, + Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(Sym)); IsCallReloc = true; } else { // N64 || PIC - Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain, + Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain, FuncInfo->callPtrInfo(Sym)); IsCallReloc = true; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 71f140b7a656..4132de6dbcad 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -272,9 +272,8 @@ namespace llvm { // // (add (load (wrapper $gp, %got(sym)), %lo(sym)) template <class NodeTy> - SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG, + SDValue getAddrLocal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG, bool IsN32OrN64) const { - SDLoc DL(N); unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(N, Ty, DAG, GOTFlag)); @@ -291,11 +290,10 @@ namespace llvm { // computing a global symbol's address: // // (load (wrapper $gp, %got(sym))) - template<class NodeTy> - SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, + template <class NodeTy> + SDValue getAddrGlobal(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG, unsigned Flag, SDValue Chain, const MachinePointerInfo &PtrInfo) const { - SDLoc DL(N); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(N, Ty, DAG, Flag)); return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0); @@ -305,14 +303,13 @@ namespace llvm { // computing a global symbol's address in large-GOT mode: // // (load (wrapper (add %hi(sym), $gp), %lo(sym))) - template<class NodeTy> - SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag, - SDValue Chain, + template <class NodeTy> + SDValue getAddrGlobalLargeGOT(NodeTy *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned HiFlag, + unsigned LoFlag, SDValue Chain, const MachinePointerInfo &PtrInfo) const { - SDLoc DL(N); - SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, - getTargetNode(N, Ty, DAG, HiFlag)); + SDValue Hi = + DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag)); Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty)); SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi, getTargetNode(N, Ty, DAG, LoFlag)); @@ -324,9 +321,9 @@ namespace llvm { // computing a symbol's address in non-PIC mode: // // (add %hi(sym), %lo(sym)) - template<class NodeTy> - SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) const { - SDLoc DL(N); + template <class NodeTy> + SDValue getAddrNonPIC(NodeTy *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG) const { SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI); SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO); return DAG.getNode(ISD::ADD, DL, Ty, @@ -338,9 +335,8 @@ namespace llvm { // computing a symbol's address using gp-relative addressing: // // (add $gp, %gp_rel(sym)) - template<class NodeTy> - SDValue getAddrGPRel(NodeTy *N, EVT Ty, SelectionDAG &DAG) const { - SDLoc DL(N); + template <class NodeTy> + SDValue getAddrGPRel(NodeTy *N, SDLoc DL, EVT Ty, SelectionDAG &DAG) const { assert(Ty == MVT::i32); SDValue GPRel = getTargetNode(N, Ty, DAG, MipsII::MO_GPREL); return DAG.getNode(ISD::ADD, DL, Ty, diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 69fc86e75414..c343980c5014 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" @@ -32,8 +33,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DataLayoutPass>(); - AU.addPreserved("stack-protector"); AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); } const char *getPassName() const override { diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 8759406a6803..da301d5de62d 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" @@ -29,8 +30,8 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DataLayoutPass>(); - AU.addPreserved("stack-protector"); AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); } bool runOnFunction(Function &F) override; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index f7259b9a098c..df2f14a91a7e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -41,7 +41,7 @@ public: : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/Is64Bit) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override { @@ -282,7 +282,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( MachO::any_relocation_info MRE; makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR, Log2Size, IsPCRel, Value2); - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered // relocation offset field, so we fall back to using a non-scattered @@ -296,7 +296,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( } MachO::any_relocation_info MRE; makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value); - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); return true; } @@ -331,9 +331,9 @@ void PPCMachObjectWriter::RecordPPCRelocation( // See <reloc.h>. const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup); unsigned Index = 0; + unsigned IsExtern = 0; unsigned Type = RelocType; - const MCSymbolData *RelSymbol = nullptr; if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. // @@ -355,7 +355,8 @@ void PPCMachObjectWriter::RecordPPCRelocation( // Check whether we need an external or internal relocation. if (Writer->doesSymbolRequireExternRelocation(SD)) { - RelSymbol = SD; + IsExtern = 1; + Index = SD->getIndex(); // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was // undefined. This occurs with weak definitions, for example. @@ -374,8 +375,9 @@ void PPCMachObjectWriter::RecordPPCRelocation( // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; - makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, false, Type); - Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); + makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern, + Type); + Writer->addRelocation(Fragment->getParent(), MRE); } MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index fcf9eca80e96..c6600550126e 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -77,7 +77,11 @@ extern Target TheGCNTarget; namespace AMDGPU { enum TargetIndex { - TI_CONSTDATA_START + TI_CONSTDATA_START, + TI_SCRATCH_RSRC_DWORD0, + TI_SCRATCH_RSRC_DWORD1, + TI_SCRATCH_RSRC_DWORD2, + TI_SCRATCH_RSRC_DWORD3 }; } diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 8a5ca613dc80..1df4448abf05 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "true", "Support flat address space">; +def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", + "EnableVGPRSpilling", + "true", + "Enable spilling of VGPRs to scratch memory">; + class SubtargetFeatureFetchLimit <string Value> : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 624f3919b409..6185e367ff50 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -116,7 +116,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; if (STM.isAmdHsaOS()) { - OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); getSIProgramInfo(KernelInfo, MF); EmitAmdKernelCodeT(MF, KernelInfo); OutStreamer.EmitCodeAlignment(2 << (MF.getAlignment() - 1)); @@ -421,6 +420,7 @@ static unsigned getRsrcReg(unsigned ShaderType) { void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo) { + const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); unsigned RsrcReg = getRsrcReg(MFI->getShaderType()); @@ -441,6 +441,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer.EmitIntValue(RsrcReg, 4); OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) | S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4); + if (STM.isVGPRSpillingEnabled(MFI)) { + OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); + OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4); + } } if (MFI->getShaderType() == ShaderType::PIXEL) { @@ -504,6 +508,19 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.wavefront_size = STM.getWavefrontSize(); + const MCSectionELF *VersionSection = OutContext.getELFSection(".hsa.version", + ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly()); + OutStreamer.SwitchSection(VersionSection); + OutStreamer.EmitBytes(Twine("HSA Code Unit:" + + Twine(header.hsail_version_major) + "." + + Twine(header.hsail_version_minor) + ":" + + "AMD:" + + Twine(header.amd_code_version_major) + "." + + Twine(header.amd_code_version_minor) + ":" + + "GFX8.1:0").str()); + + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); + if (isVerbose()) { OutStreamer.emitRawComment("amd_code_version_major = " + Twine(header.amd_code_version_major), false); diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index eaa506db96c3..15112c7e54d4 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -417,6 +417,28 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { N->getValueType(0), Ops); } + case ISD::LOAD: { + // To simplify the TableGen patters, we replace all i64 loads with + // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 + // during DAG legalization, however, so places (ExpandUnalignedLoad) + // in the DAG legalizer assume that if i64 is legal, so doing this + // promotion early can cause problems. + EVT VT = N->getValueType(0); + LoadSDNode *LD = cast<LoadSDNode>(N); + if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) + break; + + SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), + LD->getBasePtr(), LD->getMemOperand()); + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SDLoc(N), + MVT::i64, NewLoad); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); + SelectCode(NewLoad.getNode()); + N = BitCast.getNode(); + break; + } + case AMDGPUISD::REGISTER_LOAD: { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; @@ -962,16 +984,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); - unsigned ScratchPtrReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); unsigned ScratchOffsetReg = TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, ScratchOffsetReg, MVT::i32); + SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); + SDValue ScratchRsrcDword0 = + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); - SDValue ScratchPtr = - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64); + SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); + SDValue ScratchRsrcDword1 = + SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); + + const SDValue RsrcOps[] = { + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + ScratchRsrcDword0, + CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + ScratchRsrcDword1, + CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), + }; + SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::v2i32, RsrcOps), 0); Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); @@ -988,22 +1021,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, } } - // (add FI, n0) - if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && - isa<FrameIndexSDNode>(Addr.getOperand(0))) { - VAddr = Addr.getOperand(1); - ImmOffset = Addr.getOperand(0); - return true; - } - - // (FI) - if (isa<FrameIndexSDNode>(Addr)) { - VAddr = SDValue(CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, - CurDAG->getConstant(0, MVT::i32)), 0); - ImmOffset = Addr; - return true; - } - // (node) VAddr = Addr; ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 206050d54a02..2adcdf1c299e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -187,9 +187,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v2f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); - setOperationAction(ISD::LOAD, MVT::i64, Promote); - AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); - setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 5beaa6841c94..e34a7b7345f1 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -341,8 +341,39 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { // instead. namespace llvm { namespace AMDGPU { -int getMCOpcode(uint16_t Opcode, unsigned Gen) { +static int getMCOpcode(uint16_t Opcode, unsigned Gen) { return getMCOpcodeGen(Opcode, (enum Subtarget)Gen); } } } + +// This must be kept in sync with the SISubtarget class in SIInstrInfo.td +enum SISubtarget { + SI = 0, + VI = 1 +}; + +enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) { + switch (Gen) { + default: + return SI; + case AMDGPUSubtarget::VOLCANIC_ISLANDS: + return VI; + } +} + +int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { + int MCOp = AMDGPU::getMCOpcode(Opcode, + AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration())); + + // -1 means that Opcode is already a native instruction. + if (MCOp == -1) + return Opcode; + + // (uint16_t)-1 means that Opcode is a pseudo instruction that has + // no encoding in the given subtarget generation. + if (MCOp == (uint16_t)-1) + return -1; + + return MCOp; +} diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index da9833d25a52..e28ce0f03acc 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -135,6 +135,11 @@ public: bool isRegisterStore(const MachineInstr &MI) const; bool isRegisterLoad(const MachineInstr &MI) const; + /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. + /// Return -1 if the target-specific opcode for the pseudo instruction does + /// not exist. If Opcode is not a pseudo instruction, this is identity. + int pseudoToMCOpcode(int Opcode) const; + //===---------------------------------------------------------------------===// // Pure virtual funtions to be implemented by sub-classes. //===---------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 1995ef2b0c9e..03aa32d05ef0 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -39,29 +40,17 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st): Ctx(ctx), ST(st) { } -enum AMDGPUMCInstLower::SISubtarget -AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned Gen) const { - switch (Gen) { - default: - return AMDGPUMCInstLower::SI; - case AMDGPUSubtarget::VOLCANIC_ISLANDS: - return AMDGPUMCInstLower::VI; - } -} - -unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const { - - int MCOpcode = AMDGPU::getMCOpcode(MIOpcode, - AMDGPUSubtargetToSISubtarget(ST.getGeneration())); - if (MCOpcode == -1) - MCOpcode = MIOpcode; +void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { - return MCOpcode; -} + int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode()); -void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + if (MCOpcode == -1) { + LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); + C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " + "a target-specific version: " + Twine(MI->getOpcode())); + } - OutMI.setOpcode(getMCOpcode(MI->getOpcode())); + OutMI.setOpcode(MCOpcode); for (const MachineOperand &MO : MI->explicit_operands()) { MCOperand MCOp; @@ -91,6 +80,12 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::CreateExpr(Expr); break; } + case MachineOperand::MO_ExternalSymbol: { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName())); + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MCOp = MCOperand::CreateExpr(Expr); + break; + } } OutMI.addOperand(MCOp); } diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h index 0ae4d11bf1d1..d322fe072b2b 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.h +++ b/lib/Target/R600/AMDGPUMCInstLower.h @@ -19,23 +19,9 @@ class MCContext; class MCInst; class AMDGPUMCInstLower { - - // This must be kept in sync with the SISubtarget class in SIInstrInfo.td - enum SISubtarget { - SI = 0, - VI = 1 - }; - MCContext &Ctx; const AMDGPUSubtarget &ST; - /// Convert a member of the AMDGPUSubtarget::Generation enum to the - /// SISubtarget enum. - enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const; - - /// Get the MC opcode for this MachineInstr. - unsigned getMCOpcode(unsigned MIOpcode) const; - public: AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST); diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 597e558e6634..b1c7498fc142 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -18,7 +18,9 @@ #include "R600MachineScheduler.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; @@ -78,6 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), + EnableVGPRSpilling(false), DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) @@ -113,3 +116,26 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { case SEA_ISLANDS: return 12; } } + +bool AMDGPUSubtarget::isVGPRSpillingEnabled( + const SIMachineFunctionInfo *MFI) const { + return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; +} + +void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (getGeneration() >= SOUTHERN_ISLANDS) { + + // Track register pressure so the scheduler can try to decrease + // pressure once register usage is above the threshold defined by + // SIRegisterInfo::getRegPressureSetLimit() + Policy.ShouldTrackPressure = true; + + // Enabling both top down and bottom up scheduling seems to give us less + // register spills than just using one of these approaches on its own. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } +} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 90179d79d25d..566b45c1dccc 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -30,6 +30,8 @@ namespace llvm { +class SIMachineFunctionInfo; + class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { public: @@ -63,6 +65,7 @@ private: unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; + bool EnableVGPRSpilling; const DataLayout DL; AMDGPUFrameLowering FrameLowering; @@ -206,6 +209,10 @@ public: return getGeneration() <= NORTHERN_ISLANDS; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const override; + // Helper functions to simplify if statements bool isTargetELF() const { return false; @@ -224,6 +231,15 @@ public: bool isAmdHsaOS() const { return TargetTriple.getOS() == Triple::AMDHSA; } + bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + + unsigned getMaxWavesPerCU() const { + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 10; + + // FIXME: Not sure what this is for other subtagets. + llvm_unreachable("do not know max waves per CU for this subtarget."); + } }; } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index d0c634fb7e42..5fb311b3016b 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -29,7 +29,7 @@ public: const MCAsmLayout &Layout) override { //XXX: Implement if necessary. } - void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) override { diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 73a9c73d8e7b..7601794beab8 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -163,4 +163,8 @@ namespace SIOutMods { #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 #define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12) +#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 +#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12) + + #endif diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0a3fa2f930d7..6b2ea0682a43 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Val); } + + if (Info->getShaderType() != ShaderType::COMPUTE) { + unsigned ScratchIdx = CCInfo.getFirstUnallocated( + AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()); + Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx); + } return Chain; } diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 99a1df36c1f4..09c0cbe8f5c3 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -85,49 +85,41 @@ class Enc64 { let Uses = [EXEC] in { -class VOPCCommon <dag ins, string asm, list<dag> pattern> : - InstSI <(outs VCCReg:$dst), ins, asm, pattern> { +class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> : + InstSI <outs, ins, asm, pattern> { - let DisableEncoding = "$dst"; let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let UseNamedOperandTable = 1; - let VOPC = 1; let VALU = 1; +} + +class VOPCCommon <dag ins, string asm, list<dag> pattern> : + VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> { + + let DisableEncoding = "$dst"; + let VOPC = 1; let Size = 4; } class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; + VOPAnyCommon <outs, ins, asm, pattern> { + let VOP1 = 1; - let VALU = 1; let Size = 4; } class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { + VOPAnyCommon <outs, ins, asm, pattern> { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; let VOP2 = 1; - let VALU = 1; let Size = 4; } class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : - InstSI <outs, ins, asm, pattern> { + VOPAnyCommon <outs, ins, asm, pattern> { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; // Using complex patterns gives VOP3 patterns a very high complexity rating, // but standalone patterns are almost always prefered, so we need to adjust the // priority lower. The goal is to use a high number to reduce complexity to @@ -135,8 +127,6 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : let AddedComplexity = -1000; let VOP3 = 1; - let VALU = 1; - int Size = 8; } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 1a4c0d4e57b0..80b560eb65ae 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { return AMDGPU::COPY; } -static bool shouldTryToSpillVGPRs(MachineFunction *MF) { - - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - - // FIXME: Implement spilling for other shader types. - return MFI->getShaderType() == ShaderType::COMPUTE; - -} - void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, @@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; } - } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { + } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { MFI->setHasSpilledVGPRs(); switch(RC->getSize() * 8) { @@ -482,7 +473,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // Place-holder registers, these will be filled in by // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else { LLVMContext &Ctx = MF->getFunction()->getContext(); @@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); + const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); int Opcode = -1; @@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; } - } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { + } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { switch(RC->getSize() * 8) { case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; @@ -528,7 +520,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // Place-holder registers, these will be filled in by // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0, RegState::Undef); } else { @@ -615,7 +607,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, .addImm(-1) .addImm(0); - BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e32), + BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), TIDReg) .addImm(-1) .addReg(TIDReg); @@ -1053,7 +1045,11 @@ bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const { } bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { - return AMDGPU::getVOPe32(Opcode) != -1; + int Op32 = AMDGPU::getVOPe32(Opcode); + if (Op32 == -1) + return false; + + return pseudoToMCOpcode(Op32) != -1; } bool SIInstrInfo::hasModifiers(unsigned Opcode) const { @@ -1126,12 +1122,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, } switch (Desc.OpInfo[i].OperandType) { - case MCOI::OPERAND_REGISTER: { - if (MI->getOperand(i).isImm() && - !isImmOperandLegal(MI, i, MI->getOperand(i))) { - ErrInfo = "Illegal immediate value for operand."; - return false; - } + case MCOI::OPERAND_REGISTER: + if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) { + ErrInfo = "Illegal immediate value for operand."; + return false; + } + break; + case AMDGPU::OPERAND_REG_IMM32: + break; + case AMDGPU::OPERAND_REG_INLINE_C: + if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) { + ErrInfo = "Illegal immediate value for operand."; + return false; } break; case MCOI::OPERAND_IMMEDIATE: @@ -1287,7 +1289,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; case AMDGPU::S_LOAD_DWORDX4_IMM: case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; - case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; + case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; } @@ -2278,7 +2280,7 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist MachineOperand &Dest = Inst->getOperand(0); MachineOperand &Src = Inst->getOperand(1); - const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); + const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); const TargetRegisterClass *SrcRC = Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass; diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index f766dc85e86a..28cd27dd8962 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -325,7 +325,6 @@ namespace AMDGPU { int getVOPe32(uint16_t Opcode); int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); - int getMCOpcode(uint16_t Opcode, unsigned Gen); int getAddr64Inst(uint16_t Opcode); int getAtomicRetOp(uint16_t Opcode); int getAtomicNoRetOp(uint16_t Opcode); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7cc9588c8e4b..175e11d709cf 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -36,6 +36,12 @@ class vop2 <bits<6> si, bits<6> vi = si> : vop { field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}}; } +// Specify a VOP2 opcode for SI and VOP3 opcode for VI +// that doesn't have VOP2 encoding on VI +class vop23 <bits<6> si, bits<10> vi> : vop2 <si> { + let VI3 = vi; +} + class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop { let SI3 = si; let VI3 = vi; @@ -57,7 +63,7 @@ class sopk <bits<5> si, bits<5> vi = si> { } // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum -// in AMDGPUMCInstLower.h +// in AMDGPUInstrInfo.cpp def SISubtarget { int NONE = -1; int SI = 0; @@ -731,7 +737,7 @@ class getAsm32 <int NumSrcArgs> { // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. class getAsm64 <int NumSrcArgs, bit HasModifiers> { - string src0 = "$src0_modifiers,"; + string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); string src1 = !if(!eq(NumSrcArgs, 1), "", !if(!eq(NumSrcArgs, 2), " $src1_modifiers", " $src1_modifiers,")); @@ -848,6 +854,16 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : let isPseudo = 1; } +multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, + string opName, string revOpSI> { + def "" : VOP2_Pseudo <outs, ins, pattern, opName>, + VOP2_REV<revOpSI#"_e32", !eq(revOpSI, opName)>; + + def _si : VOP2 <op.SI, outs, ins, opName#asm, []>, + VOP2_REV<revOpSI#"_e32_si", !eq(revOpSI, opName)>, + SIMCInstr <opName#"_e32", SISubtarget.SI>; +} + multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern, string opName, string revOpSI, string revOpVI> { def "" : VOP2_Pseudo <outs, ins, pattern, opName>, @@ -889,16 +905,6 @@ class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> : VOP3e_vi <op>, SIMCInstr <opName#"_e64", SISubtarget.VI>; -// VI only instruction -class VOP3_vi <bits<10> op, string opName, dag outs, dag ins, string asm, - list<dag> pattern, int NumSrcArgs, bit HasMods = 1> : - VOP3Common <outs, ins, asm, pattern>, - VOP <opName>, - VOP3e_vi <op>, - VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1), - !if(!eq(NumSrcArgs, 2), 0, 1), - HasMods>; - multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern, string opName, int NumSrcArgs, bit HasMods = 1> { @@ -998,6 +1004,23 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm, } } +// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers. +multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins, + string asm, list<dag> pattern = []> { + let isPseudo = 1 in { + def "" : VOPAnyCommon <outs, ins, "", pattern>, + SIMCInstr<opName, SISubtarget.NONE>; + } + + def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>, + SIMCInstr <opName, SISubtarget.SI>; + + def _vi : VOP3Common <outs, ins, asm, []>, + VOP3e_vi <op.VI3>, + VOP3DisableFields <1, 0, 0>, + SIMCInstr <opName, SISubtarget.VI>; +} + multiclass VOP1_Helper <vop1 op, string opName, dag outs, dag ins32, string asm32, list<dag> pat32, dag ins64, string asm64, list<dag> pat64, @@ -1089,6 +1112,33 @@ multiclass VOP2bInst <vop2 op, string opName, VOPProfile P, revOp, P.HasModifiers >; +// A VOP2 instruction that is VOP3-only on VI. +multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs, + dag ins32, string asm32, list<dag> pat32, + dag ins64, string asm64, list<dag> pat64, + string revOpSI, string revOpVI, bit HasMods> { + defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOpSI>; + + defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, + revOpSI, revOpVI, HasMods>; +} + +multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P, + SDPatternOperator node = null_frag, + string revOpSI = opName, string revOpVI = revOpSI> + : VOP2_VI3_Helper < + op, opName, P.Outs, + P.Ins32, P.Asm32, [], + P.Ins64, P.Asm64, + !if(P.HasModifiers, + [(set P.DstVT:$dst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, + i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], + [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), + revOpSI, revOpVI, P.HasModifiers +>; + class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> : VOPCCommon <ins, "", pattern>, VOP <opName>, @@ -1224,34 +1274,6 @@ multiclass VOP3Inst <vop3 op, string opName, VOPProfile P, P.NumSrcArgs, P.HasModifiers >; -class VOP3InstVI <bits<10> op, string opName, VOPProfile P, - SDPatternOperator node = null_frag> : VOP3_vi < - op, opName#"_vi", P.Outs, P.Ins64, opName#P.Asm64, - !if(!eq(P.NumSrcArgs, 3), - !if(P.HasModifiers, - [(set P.DstVT:$dst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))], - [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1, - P.Src2VT:$src2))]), - !if(!eq(P.NumSrcArgs, 2), - !if(P.HasModifiers, - [(set P.DstVT:$dst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], - [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]) - /* P.NumSrcArgs == 1 */, - !if(P.HasModifiers, - [(set P.DstVT:$dst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod))))], - [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))), - P.NumSrcArgs, P.HasModifiers ->; - multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc, string opName, list<dag> pattern> : VOP3b_2_m < diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e05b6bb7d0f1..4b1a84662cb5 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1525,25 +1525,25 @@ defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32", } // End Uses = [VCC] } // End isCommutable = 1, Defs = [VCC] -// These instructions only exist on SI and CI -let SubtargetPredicate = isSICI in { - -def V_READLANE_B32 : VOP2 < - 0x00000001, +defm V_READLANE_B32 : VOP2SI_3VI_m < + vop3 <0x001, 0x289>, + "v_readlane_b32", (outs SReg_32:$vdst), (ins VGPR_32:$src0, SSrc_32:$vsrc1), - "v_readlane_b32 $vdst, $src0, $vsrc1", - [] + "v_readlane_b32 $vdst, $src0, $vsrc1" >; -def V_WRITELANE_B32 : VOP2 < - 0x00000002, +defm V_WRITELANE_B32 : VOP2SI_3VI_m < + vop3 <0x002, 0x28a>, + "v_writelane_b32", (outs VGPR_32:$vdst), (ins SReg_32:$src0, SSrc_32:$vsrc1), - "v_writelane_b32 $vdst, $src0, $vsrc1", - [] + "v_writelane_b32 $vdst, $src0, $vsrc1" >; +// These instructions only exist on SI and CI +let SubtargetPredicate = isSICI in { + let isCommutable = 1 in { defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32", VOP_F32_F32_F32 @@ -1568,30 +1568,33 @@ defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>; } } // End isCommutable = 1 +} // End let SubtargetPredicate = SICI -defm V_BFM_B32 : VOP2Inst <vop2<0x1e>, "v_bfm_b32", VOP_I32_I32_I32, - AMDGPUbfm>; -defm V_BCNT_U32_B32 : VOP2Inst <vop2<0x22>, "v_bcnt_u32_b32", VOP_I32_I32_I32>; -defm V_MBCNT_LO_U32_B32 : VOP2Inst <vop2<0x23>, "v_mbcnt_lo_u32_b32", +defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32, + AMDGPUbfm +>; +defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32", + VOP_I32_I32_I32 +>; +defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32", VOP_I32_I32_I32 >; -defm V_MBCNT_HI_U32_B32 : VOP2Inst <vop2<0x24>, "v_mbcnt_hi_u32_b32", +defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32", VOP_I32_I32_I32 >; -defm V_LDEXP_F32 : VOP2Inst <vop2<0x2b>, "v_ldexp_f32", +defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp >; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>; ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>; -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <vop2<0x2f>, "v_cvt_pkrtz_f16_f32", +defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16 >; ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>; ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>; -} // End let SubtargetPredicate = SICI //===----------------------------------------------------------------------===// // VOP3 Instructions //===----------------------------------------------------------------------===// @@ -1656,9 +1659,6 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32", VOP_I32_I32_I32_I32 >; -// Only on SI -defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32", - VOP_F32_F32_F32_F32>; defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32", VOP_F32_F32_F32_F32, AMDGPUfmin3>; @@ -1699,20 +1699,6 @@ defm V_DIV_FIXUP_F64 : VOP3Inst < } // let SchedRW = [WriteDouble] -defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", - VOP_I64_I64_I32, shl ->; - -// Only on SI -defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", - VOP_I64_I64_I32, srl ->; - -// Only on SI -defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", - VOP_I64_I64_I32, sra ->; - let SchedRW = [WriteDouble] in { let isCommutable = 1 in { @@ -1785,6 +1771,26 @@ defm V_TRIG_PREOP_F64 : VOP3Inst < } // let SchedRW = [WriteDouble] +// These instructions only exist on SI and CI +let SubtargetPredicate = isSICI in { + +defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", + VOP_I64_I64_I32, shl +>; + +defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", + VOP_I64_I64_I32, srl +>; + +defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", + VOP_I64_I64_I32, sra +>; + +defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32", + VOP_F32_F32_F32_F32>; + +} // End SubtargetPredicate = isSICI + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// @@ -1943,14 +1949,14 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> { let UseNamedOperandTable = 1 in { def _SAVE : InstSI < (outs), - (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; def _RESTORE : InstSI < (outs sgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; } // End UseNamedOperandTable = 1 @@ -1966,14 +1972,14 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { let UseNamedOperandTable = 1 in { def _SAVE : InstSI < (outs), - (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; def _RESTORE : InstSI < (outs vgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; } // End UseNamedOperandTable = 1 @@ -2728,16 +2734,12 @@ def : Pat < (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; -let Predicates = [isSICI] in { - def : Pat < (int_SI_tid), - (V_MBCNT_HI_U32_B32_e32 0xffffffff, + (V_MBCNT_HI_U32_B32_e64 0xffffffff, (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0)) >; -} - //===----------------------------------------------------------------------===// // VOP3 Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 71852717d7e6..667da4c8af61 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -50,6 +50,7 @@ public: unsigned NumUserSGPRs; std::map<unsigned, unsigned> LaneVGPRs; unsigned LDSWaveSpillSize; + unsigned ScratchOffsetReg; bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; unsigned getTIDReg() const { return TIDReg; }; void setTIDReg(unsigned Reg) { TIDReg = Reg; } diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp index f0e7edec6b48..0a57a5bc201a 100644 --- a/lib/Target/R600/SIPrepareScratchRegs.cpp +++ b/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -84,28 +84,10 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { if (!Entry->isLiveIn(ScratchOffsetPreloadReg)) Entry->addLiveIn(ScratchOffsetPreloadReg); - // Load the scratch pointer - unsigned ScratchPtrReg = - TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass); - int ScratchPtrFI = -1; - - if (ScratchPtrReg != AMDGPU::NoRegister) { - // Found an SGPR to use. - MRI.setPhysRegUsed(ScratchPtrReg); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg) - .addReg(ScratchPtrPreloadReg); - } else { - // No SGPR is available, we must spill. - ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE)) - .addReg(ScratchPtrPreloadReg) - .addFrameIndex(ScratchPtrFI); - } - // Load the scratch offset. unsigned ScratchOffsetReg = TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); - int ScratchOffsetFI = ~0; + int ScratchOffsetFI = -1; if (ScratchOffsetReg != AMDGPU::NoRegister) { // Found an SGPR to use @@ -117,7 +99,9 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4); BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE)) .addReg(ScratchOffsetPreloadReg) - .addFrameIndex(ScratchOffsetFI); + .addFrameIndex(ScratchOffsetFI) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef); } @@ -125,22 +109,27 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { // add them to all the SI_SPILL_V* instructions. RegScavenger RS; - bool UseRegScavenger = - (ScratchPtrReg == AMDGPU::NoRegister || - ScratchOffsetReg == AMDGPU::NoRegister); + unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4); + RS.addScavengingFrameIndex(ScratchRsrcFI); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; - if (UseRegScavenger) - RS.enterBasicBlock(&MBB); + // Add the scratch offset reg as a live-in so that the register scavenger + // doesn't re-use it. + if (!MBB.isLiveIn(ScratchOffsetReg) && + ScratchOffsetReg != AMDGPU::NoRegister) + MBB.addLiveIn(ScratchOffsetReg); + RS.enterBasicBlock(&MBB); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; + RS.forward(I); DebugLoc DL = MI.getDebugLoc(); switch(MI.getOpcode()) { - default: break;; + default: break; case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -153,43 +142,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: - // Scratch Pointer - if (ScratchPtrReg == AMDGPU::NoRegister) { - ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0); - BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE), - ScratchPtrReg) - .addFrameIndex(ScratchPtrFI) - .addReg(AMDGPU::NoRegister) - .addReg(AMDGPU::NoRegister); - } else if (!MBB.isLiveIn(ScratchPtrReg)) { - MBB.addLiveIn(ScratchPtrReg); - } + // Scratch resource + unsigned ScratchRsrcReg = + RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); + + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size + + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) + .addImm(Rsrc & 0xffffffff) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) + .addImm(Rsrc >> 32) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + // Scratch Offset if (ScratchOffsetReg == AMDGPU::NoRegister) { ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), ScratchOffsetReg) .addFrameIndex(ScratchOffsetFI) - .addReg(AMDGPU::NoRegister) - .addReg(AMDGPU::NoRegister); + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef); } else if (!MBB.isLiveIn(ScratchOffsetReg)) { MBB.addLiveIn(ScratchOffsetReg); } - if (ScratchPtrReg == AMDGPU::NoRegister || + if (ScratchRsrcReg == AMDGPU::NoRegister || ScratchOffsetReg == AMDGPU::NoRegister) { LLVMContext &Ctx = MF.getFunction()->getContext(); Ctx.emitError("ran out of SGPRs for spilling VGPRs"); - ScratchPtrReg = AMDGPU::SGPR0; + ScratchRsrcReg = AMDGPU::SGPR0; ScratchOffsetReg = AMDGPU::SGPR0; } - MI.getOperand(2).setReg(ScratchPtrReg); + MI.getOperand(2).setReg(ScratchRsrcReg); + MI.getOperand(2).setIsKill(true); + MI.getOperand(2).setIsUndef(false); MI.getOperand(3).setReg(ScratchOffsetReg); + MI.getOperand(3).setIsUndef(false); + MI.getOperand(3).setIsKill(false); + MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); break; } - if (UseRegScavenger) - RS.forward(); } } return true; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index f9feea470f15..0396bf384066 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -23,7 +23,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Support/Debug.h" using namespace llvm; SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) @@ -51,9 +50,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - return RC->getNumRegs(); +unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + + // FIXME: We should adjust the max number of waves based on LDS size. + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + + unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned Limit; + + if (isSGPRClass(*I)) { + Limit = SGPRLimit / NumSubRegs; + } else { + Limit = VGPRLimit / NumSubRegs; + } + + const int *Sets = getRegClassPressureSets(*I); + assert(Sets); + for (unsigned i = 0; Sets[i] != -1; ++i) { + if (Sets[i] == (int)Idx) + return Limit; + } + } + return 256; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { @@ -98,7 +120,7 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, - unsigned ScratchPtr, + unsigned ScratchRsrcReg, unsigned ScratchOffset, int64_t Offset, RegScavenger *RS) const { @@ -113,33 +135,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, bool RanOutOfSGPRs = false; unsigned SOffset = ScratchOffset; - unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0); - if (RsrcReg == AMDGPU::NoRegister) { - RanOutOfSGPRs = true; - RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; - } - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); unsigned Size = NumSubRegs * 4; - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64), - getSubReg(RsrcReg, AMDGPU::sub0_sub1)) - .addReg(ScratchPtr) - .addReg(RsrcReg, RegState::ImplicitDefine); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), - getSubReg(RsrcReg, AMDGPU::sub2)) - .addImm(Rsrc & 0xffffffff) - .addReg(RsrcReg, RegState::ImplicitDefine); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), - getSubReg(RsrcReg, AMDGPU::sub3)) - .addImm(Rsrc >> 32) - .addReg(RsrcReg, RegState::ImplicitDefine); - if (!isUInt<12>(Offset + Size)) { SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); if (SOffset == AMDGPU::NoRegister) { @@ -163,9 +161,9 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .addReg(SubReg, getDefRegState(IsLoad)) - .addReg(RsrcReg, getKillRegState(IsKill)) + .addReg(ScratchRsrcReg, getKillRegState(IsKill)) .addImm(Offset) - .addReg(SOffset, getKillRegState(IsKill)) + .addReg(SOffset) .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe @@ -235,9 +233,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, Ctx.emitError("Ran out of VGPRs for spilling SGPR"); } - if (isM0) { + if (isM0) SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); - } BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) @@ -262,7 +259,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); @@ -274,7 +271,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), - TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); @@ -289,7 +286,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) .addImm(Offset); - FIOp.ChangeToRegister(TmpReg, false); + FIOp.ChangeToRegister(TmpReg, false, false, true); } } } @@ -446,6 +443,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::TGID_Z: return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2); case SIRegisterInfo::SCRATCH_WAVE_OFFSET: + if (MFI->getShaderType() != ShaderType::COMPUTE) + return MFI->ScratchOffsetReg; return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4); case SIRegisterInfo::SCRATCH_PTR: return AMDGPU::SGPR2_SGPR3; @@ -475,3 +474,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, return AMDGPU::NoRegister; } +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return 256; + } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index d14212c2b104..d908ffd12d2c 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "llvm/Support/Debug.h" namespace llvm { @@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const override; + unsigned getRegPressureSetLimit(unsigned Idx) const override; bool requiresRegisterScavenging(const MachineFunction &Fn) const override; @@ -105,13 +105,21 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const; + /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumVGPRsAllowed(unsigned WaveCount) const; + + /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumSGPRsAllowed(unsigned WaveCount) const; + unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; private: void buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, unsigned Value, - unsigned ScratchPtr, unsigned ScratchOffset, + unsigned ScratchRsrcReg, unsigned ScratchOffset, int64_t Offset, RegScavenger *RS) const; }; diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp index f91d1177bbae..6a3410688fe7 100644 --- a/lib/Target/R600/SIShrinkInstructions.cpp +++ b/lib/Target/R600/SIShrinkInstructions.cpp @@ -10,6 +10,7 @@ // #include "AMDGPU.h" +#include "AMDGPUMCInstLower.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "llvm/ADT/Statistic.h" @@ -206,13 +207,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; } - int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); - - // Op32 could be -1 here if we started with an instruction that had a + // getVOPe32 could be -1 here if we started with an instruction that had // a 32-bit encoding and then commuted it to an instruction that did not. - if (Op32 == -1) + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; + int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); + if (TII->isVOPC(Op32)) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td index 07cfa29ae12b..24e66cea6277 100644 --- a/lib/Target/R600/VIInstructions.td +++ b/lib/Target/R600/VIInstructions.td @@ -11,22 +11,6 @@ let SubtargetPredicate = isVI in { -def V_LDEXP_F32 : VOP3InstVI <0x288, "v_ldexp_f32", VOP_F32_F32_I32, - AMDGPUldexp ->; -def V_BFM_B32 : VOP3InstVI <0x293, "v_bfm_b32", VOP_I32_I32_I32, AMDGPUbfm>; -def V_BCNT_U32_B32 : VOP3InstVI <0x28b, "v_bcnt_u32_b32", VOP_I32_I32_I32>; -def V_MBCNT_LO_U32_B32 : VOP3InstVI <0x28c, "v_mbcnt_lo_u32_b32", - VOP_I32_I32_I32 ->; -def V_MBCNT_HI_U32_B32 : VOP3InstVI <0x28d, "v_mbcnt_hi_u32_b32", - VOP_I32_I32_I32 ->; - -def V_CVT_PKRTZ_F16_F32 : VOP3InstVI <0x296, "v_cvt_pkrtz_f16_f32", - VOP_I32_F32_F32, int_SI_packf16 ->; - defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi < 0x14, "buffer_load_dword", VGPR_32, i32, global_load >; @@ -37,22 +21,13 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi < } // End SubtargetPredicate = isVI -//===----------------------------------------------------------------------===// -// VOP2 Patterns -//===----------------------------------------------------------------------===// - -let Predicates = [isVI] in { - -def : Pat < - (int_SI_tid), - (V_MBCNT_HI_U32_B32 0xffffffff, - (V_MBCNT_LO_U32_B32 0xffffffff, 0)) ->; //===----------------------------------------------------------------------===// // SMEM Patterns //===----------------------------------------------------------------------===// +let Predicates = [isVI] in { + // 1. Offset as 8bit DWORD immediate def : Pat < (SIload_constant v4i32:$sbase, IMM20bit:$offset), diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 6767e4b224f8..42690206e8c7 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -42,7 +42,8 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; - UseIntegratedAssembler = true; + if (TheTriple.isOSSolaris() || TheTriple.isOSOpenBSD()) + UseIntegratedAssembler = true; } const MCExpr* diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 719b761084f9..164b4192ae66 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -777,6 +777,19 @@ public: MachO::CPU_TYPE_X86_64, Subtype); } + bool doesSectionRequireSymbols(const MCSection &Section) const override { + // Temporary labels in the string literals sections require symbols. The + // issue is that the x86_64 relocation format does not allow symbol + + // offset, and so the linker does not have enough information to resolve the + // access to the appropriate atom unless an external relocation is used. For + // non-cstring sections, we expect the compiler to use a non-temporary label + // for anything that could have an addend pointing outside the symbol. + // + // See <rdar://problem/4765733>. + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + return SMO.getType() == MachO::S_CSTRING_LITERALS; + } + /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( ArrayRef<MCCFIInstruction> Instrs) const override { diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index be6a8e4a43eb..e8b0b4c5826f 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -222,6 +222,9 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, case MCSymbolRefExpr::VK_GOT: Type = ELF::R_386_GOT32; break; + case MCSymbolRefExpr::VK_PLT: + Type = ELF::R_386_PLT32; + break; case MCSymbolRefExpr::VK_GOTOFF: Type = ELF::R_386_GOTOFF; break; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 7a83f4c64e6d..67b0c8900850 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -10,7 +10,6 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -48,21 +47,23 @@ class X86MachObjectWriter : public MCMachObjectTargetWriter { const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); - void RecordX86_64Relocation(MachObjectWriter *Writer, MCAssembler &Asm, + void RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); - + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); public: X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/Is64Bit) {} - void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, - const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) override { + void RecordRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) override { if (Writer->is64Bit()) RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); @@ -96,10 +97,13 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { } } -void X86MachObjectWriter::RecordX86_64Relocation( - MachObjectWriter *Writer, MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { +void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); @@ -113,7 +117,6 @@ void X86MachObjectWriter::RecordX86_64Relocation( unsigned Index = 0; unsigned IsExtern = 0; unsigned Type = 0; - const MCSymbolData *RelSymbol = nullptr; Value = Target.getConstant(); @@ -129,6 +132,7 @@ void X86MachObjectWriter::RecordX86_64Relocation( if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. Type = MachO::X86_64_RELOC_UNSIGNED; + Index = 0; // FIXME: I believe this is broken, I don't think the linker can understand // it. I think it would require a local relocation, but I'm not sure if that @@ -189,30 +193,36 @@ void X86MachObjectWriter::RecordX86_64Relocation( Value -= Writer->getSymbolAddress(&B_SD, Layout) - (!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout)); - if (!A_Base) + if (A_Base) { + Index = A_Base->getIndex(); + IsExtern = 1; + } else { Index = A_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } Type = MachO::X86_64_RELOC_UNSIGNED; MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = - (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); - Writer->addRelocation(A_Base, Fragment->getParent(), MRE); - - if (B_Base) - RelSymbol = B_Base; - else + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + + if (B_Base) { + Index = B_Base->getIndex(); + IsExtern = 1; + } else { Index = B_SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; + } Type = MachO::X86_64_RELOC_SUBTRACTOR; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - if (Symbol->isTemporary() && Value) { - const MCSection &Sec = Symbol->getSection(); - if (!Asm.getContext().getAsmInfo()->isSectionAtomizableBySymbols(Sec)) - Asm.addLocalUsedInReloc(*Symbol); - } const MCSymbolData &SD = Asm.getSymbolData(*Symbol); - RelSymbol = Asm.getAtom(&SD); + const MCSymbolData *Base = Asm.getAtom(&SD); // Relocations inside debug sections always use local relocations when // possible. This seems to be done because the debugger doesn't fully @@ -222,20 +232,23 @@ void X86MachObjectWriter::RecordX86_64Relocation( const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( Fragment->getParent()->getSection()); if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) - RelSymbol = nullptr; + Base = nullptr; } // x86_64 almost always uses external relocations, except when there is no // symbol to use as a base address (a local symbol with no preceding // non-local symbol). - if (RelSymbol) { + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + // Add the local offset, if needed. - if (RelSymbol != &SD) - Value += - Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(RelSymbol); + if (Base != &SD) + Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); } else if (Symbol->isInSection() && !Symbol->isVariable()) { // The index is the section ordinal (1-based). Index = SD.getFragment()->getParent()->getOrdinal() + 1; + IsExtern = 0; Value += Writer->getSymbolAddress(&SD, Layout); if (IsPCRel) @@ -334,9 +347,12 @@ void X86MachObjectWriter::RecordX86_64Relocation( // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28); - Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); } bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, @@ -408,7 +424,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); } else { // If the offset is more than 24-bits, it won't fit in a scattered // relocation offset field, so we fall back to using a non-scattered @@ -430,7 +446,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(nullptr, Fragment->getParent(), MRE); + Writer->addRelocation(Fragment->getParent(), MRE); return true; } @@ -451,6 +467,7 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, // Get the symbol data. const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); // We're only going to have a second symbol in pic mode and it'll be a // subtraction from the picbase. For 32-bit pic the addend is the difference @@ -473,9 +490,12 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = Value; - MRE.r_word1 = - (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28); - Writer->addRelocation(SD_A, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // r_extern + (MachO::GENERIC_RELOC_TLV << 28)); // r_type + Writer->addRelocation(Fragment->getParent(), MRE); } void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, @@ -526,8 +546,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned Index = 0; + unsigned IsExtern = 0; unsigned Type = 0; - const MCSymbolData *RelSymbol = nullptr; if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. @@ -548,7 +568,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // Check whether we need an external or internal relocation. if (Writer->doesSymbolRequireExternRelocation(SD)) { - RelSymbol = SD; + IsExtern = 1; + Index = SD->getIndex(); // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was // undefined. This occurs with weak definitions, for example. @@ -570,9 +591,12 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = - (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); - Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); } MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index dab2c4b47ad6..83b4b82311f2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1376,6 +1376,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); if (!Callee) return false; + // The prototype of thunks are a lie, don't try to directly call such + // functions. + if (Callee->hasFnAttribute("thunk")) + return false; Instruction *Caller = CS.getInstruction(); const AttributeSet &CallerPAL = CS.getAttributes(); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index b814b2525dca..ac13eebf8275 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2182,9 +2182,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, // Handle the floating point versions of equality comparisons too. if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) || - (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) - Worklist.push_back(std::make_pair(Op0, Op1)); - + (isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) { + // Floating point -0.0 and 0.0 compare equal, so we can't + // propagate a constant based on that comparison. + // FIXME: We should do this optimization if 'no signed zeros' is + // applicable via an instruction-level fast-math-flag or some other + // indicator that relaxed FP semantics are being used. + if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero()) + Worklist.push_back(std::make_pair(Op0, Op1)); + } + // If "A >= B" is known true, replace "A < B" with false everywhere. CmpInst::Predicate NotPred = Cmp->getInversePredicate(); Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 1ac38e0f52a5..d664f85c773d 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -47,7 +48,7 @@ namespace { AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - AU.addPreserved("scalar-evolution"); + AU.addPreserved<ScalarEvolution>(); AU.addRequired<TargetLibraryInfo>(); } }; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 35cd917330ab..04b91306d3cb 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -46,7 +46,6 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); - AU.addPreserved("mem2reg"); AU.addPreservedID(LowerInvokePassID); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index f8aa1d3eec12..6cb91a154f06 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -278,9 +278,8 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Value *IVOperand) { // Currently we only handle instructions of the form "add <indvar> <value>" - // and "sub <indvar> <value>". unsigned Op = BO->getOpcode(); - if (!(Op == Instruction::Add || Op == Instruction::Sub)) + if (Op != Instruction::Add) return false; // If BO is already both nuw and nsw then there is nothing left to do @@ -304,15 +303,6 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, if (OtherOpSCEV == SE->getCouldNotCompute()) return false; - if (Op == Instruction::Sub) { - // If the subtraction is of the form "sub <indvar>, <op>", then pretend it - // is "add <indvar>, -<op>" and continue, else bail out. - if (OtherOperandIdx != 1) - return false; - - OtherOpSCEV = SE->getNegativeSCEV(OtherOpSCEV); - } - const SCEV *IVOpSCEV = SE->getSCEV(IVOperand); const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5b4647ddcb5e..5a0d52e04f9f 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1968,8 +1968,12 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { // Try to further simplify the result. CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); if (SimplifiedCI && SimplifiedCI->getCalledFunction()) - if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) + if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { + // If we were able to further simplify, remove the now redundant call. + SimplifiedCI->replaceAllUsesWith(V); + SimplifiedCI->eraseFromParent(); return V; + } return SimplifiedFortifiedCI; } @@ -2218,11 +2222,11 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> & return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, + IRBuilder<> &B, + LibFunc::Func Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - LibFunc::Func Func = - Name.startswith("str") ? LibFunc::strcpy_chk : LibFunc::stpcpy_chk; if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) return nullptr; @@ -2231,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> & *ObjSize = CI->getArgOperand(2); // __stpcpy_chk(x,x,...) -> x+strlen(x) - if (!OnlyLowerUnknownSize && Dst == Src) { + if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { Value *StrLen = EmitStrLen(Src, B, DL, TLI); return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } @@ -2266,11 +2270,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrCpyChk(CallInst *CI, IRBuilder<> & return nullptr; } -Value *FortifiedLibCallSimplifier::optimizeStrNCpyChk(CallInst *CI, IRBuilder<> &B) { +Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, + IRBuilder<> &B, + LibFunc::Func Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - LibFunc::Func Func = - Name.startswith("str") ? LibFunc::strncpy_chk : LibFunc::stpncpy_chk; if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) return nullptr; @@ -2310,10 +2314,10 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeMemSetChk(CI, Builder); case LibFunc::stpcpy_chk: case LibFunc::strcpy_chk: - return optimizeStrCpyChk(CI, Builder); + return optimizeStrpCpyChk(CI, Builder, Func); case LibFunc::stpncpy_chk: case LibFunc::strncpy_chk: - return optimizeStrNCpyChk(CI, Builder); + return optimizeStrpNCpyChk(CI, Builder, Func); default: break; } diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index b35a662f17b5..d36283ea3ae1 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -79,6 +79,19 @@ static cl::list<std::string> RewriteMapFiles("rewrite-map-file", namespace llvm { namespace SymbolRewriter { +void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, + const std::string &Target) { + if (Comdat *CD = GO->getComdat()) { + auto &Comdats = M.getComdatSymbolTable(); + + Comdat *C = M.getOrInsertComdat(Target); + C->setSelectionKind(CD->getSelectionKind()); + GO->setComdat(C); + + Comdats.erase(Comdats.find(Source)); + } +} + template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { @@ -102,10 +115,14 @@ template <RewriteDescriptor::Type DT, typename ValueType, bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) { bool Changed = false; if (ValueType *S = (M.*Get)(Source)) { + if (GlobalObject *GO = dyn_cast<GlobalObject>(S)) + rewriteComdat(M, GO, Source, Target); + if (Value *T = (M.*Get)(Target)) S->setValueName(T->getValueName()); else S->setName(Target); + Changed = true; } return Changed; @@ -145,6 +162,12 @@ performOnModule(Module &M) { report_fatal_error("unable to transforn " + C.getName() + " in " + M.getModuleIdentifier() + ": " + Error); + if (C.getName() == Name) + continue; + + if (GlobalObject *GO = dyn_cast<GlobalObject>(&C)) + rewriteComdat(M, GO, C.getName(), Name); + if (Value *V = (M.*Get)(Name)) C.setValueName(V->getValueName()); else diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 0c2fc0a972b5..7e00a80989dc 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -35,7 +35,6 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); // This is a cluster of orthogonal Transforms - AU.addPreserved("mem2reg"); AU.addPreservedID(LowerSwitchID); } diff --git a/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll b/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll new file mode 100644 index 000000000000..41b07d5cd537 --- /dev/null +++ b/test/Analysis/ScalarEvolution/nw-sub-is-not-nw-add.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +; Check that SCEV does not assume sub nuw X Y == add nuw X, -Y +define void @f(i32* %loc) { +; CHECK-LABEL: @f + entry: + br label %loop + + loop: + %idx = phi i32 [ 6, %entry ], [ %idx.dec, %loop ] + store i32 %idx, i32* %loc + %idx.dec = sub nuw i32 %idx, 1 + %cond = icmp uge i32 %idx.dec, 5 + br i1 %cond, label %loop, label %exit +; CHECK-NOT: br i1 true, label %loop, label %exit + + exit: + ret void +} + +declare void @use_i1(i1) + +; Check that SCEV does not assume sub nsw X Y == add nsw X, -Y +define void @g(i32 %lim) { +; CHECK-LABEL: @g + entry: + br label %loop + + loop: + %idx = phi i32 [ -1, %entry ], [ %idx.dec, %loop ] + %t = icmp sgt i32 %idx, 0 +; CHECK-NOT: call void @use_i1(i1 false) +; CHECK: call void @use_i1(i1 %t) + call void @use_i1(i1 %t) + %idx.dec = sub nsw i32 %idx, -2147483648 + %cond = icmp eq i32 %idx.dec, %lim + br i1 %cond, label %loop, label %exit + + exit: + ret void +} diff --git a/test/Bindings/llvm-c/add_named_metadata_operand.ll b/test/Bindings/llvm-c/add_named_metadata_operand.ll new file mode 100644 index 000000000000..fcc833d6af03 --- /dev/null +++ b/test/Bindings/llvm-c/add_named_metadata_operand.ll @@ -0,0 +1,2 @@ +; RUN: llvm-c-test --add-named-metadata-operand < /dev/null +; This used to trigger an assertion diff --git a/test/Bindings/llvm-c/set_metadata.ll b/test/Bindings/llvm-c/set_metadata.ll new file mode 100644 index 000000000000..0a65b8c47dee --- /dev/null +++ b/test/Bindings/llvm-c/set_metadata.ll @@ -0,0 +1,2 @@ +; RUN: llvm-c-test --set-metadata < /dev/null +; This used to trigger an assertion diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll index 651c793f73a4..b0d3ee0ff8a3 100644 --- a/test/CodeGen/AArch64/arm64-platform-reg.ll +++ b/test/CodeGen/AArch64/arm64-platform-reg.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-DARWIN +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s ; x18 is reserved as a platform register on Darwin but not on other @@ -16,11 +17,11 @@ define void @keep_live() { ; CHECK: ldr x18 ; CHECK: str x18 -; CHECK-DARWIN-NOT: ldr fp -; CHECK-DARWIN-NOT: ldr x18 -; CHECK-DARWIN: Spill -; CHECK-DARWIN-NOT: ldr fp -; CHECK-DARWIN-NOT: ldr x18 -; CHECK-DARWIN: ret +; CHECK-RESERVE-X18-NOT: ldr fp +; CHECK-RESERVE-X18-NOT: ldr x18 +; CHECK-RESERVE-X18: Spill +; CHECK-RESERVE-X18-NOT: ldr fp +; CHECK-RESERVE-X18-NOT: ldr x18 +; CHECK-RESERVE-X18: ret ret void } diff --git a/test/CodeGen/AArch64/ghc-cc.ll b/test/CodeGen/AArch64/ghc-cc.ll new file mode 100644 index 000000000000..505bd5fca66d --- /dev/null +++ b/test/CodeGen/AArch64/ghc-cc.ll @@ -0,0 +1,89 @@ +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; Check the GHC call convention works (aarch64) + +@base = external global i64 ; assigned to register: r19 +@sp = external global i64 ; assigned to register: r20 +@hp = external global i64 ; assigned to register: r21 +@r1 = external global i64 ; assigned to register: r22 +@r2 = external global i64 ; assigned to register: r23 +@r3 = external global i64 ; assigned to register: r24 +@r4 = external global i64 ; assigned to register: r25 +@r5 = external global i64 ; assigned to register: r26 +@r6 = external global i64 ; assigned to register: r27 +@splim = external global i64 ; assigned to register: r28 + +@f1 = external global float ; assigned to register: s8 +@f2 = external global float ; assigned to register: s9 +@f3 = external global float ; assigned to register: s10 +@f4 = external global float ; assigned to register: s11 + +@d1 = external global double ; assigned to register: d12 +@d2 = external global double ; assigned to register: d13 +@d3 = external global double ; assigned to register: d14 +@d4 = external global double ; assigned to register: d15 + +define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind { +entry: + ; CHECK-LABEL: addtwo + ; CHECK: add x0, x19, x20 + ; CHECK-NEXT: ret + %0 = add i64 %x, %y + ret i64 %0 +} + +define void @zap(i64 %a, i64 %b) nounwind { +entry: + ; CHECK-LABEL: zap + ; CHECK-NOT: mov {{x[0-9]+}}, sp + ; CHECK: bl addtwo + ; CHECK-NEXT: bl foo + %0 = call ghccc i64 @addtwo(i64 %a, i64 %b) + call void @foo() nounwind + ret void +} + +define ghccc void @foo_i64 () nounwind { +entry: + ; CHECK-LABEL: foo_i64 + ; CHECK: adrp {{x[0-9]+}}, base + ; CHECK-NEXT: ldr x19, [{{x[0-9]+}}, :lo12:base] + ; CHECK-NEXT: bl bar_i64 + ; CHECK-NEXT: ret + + %0 = load i64* @base + tail call ghccc void @bar_i64( i64 %0 ) nounwind + ret void +} + +define ghccc void @foo_float () nounwind { +entry: + ; CHECK-LABEL: foo_float + ; CHECK: adrp {{x[0-9]+}}, f1 + ; CHECK-NEXT: ldr s8, [{{x[0-9]+}}, :lo12:f1] + ; CHECK-NEXT: bl bar_float + ; CHECK-NEXT: ret + + %0 = load float* @f1 + tail call ghccc void @bar_float( float %0 ) nounwind + ret void +} + +define ghccc void @foo_double () nounwind { +entry: + ; CHECK-LABEL: foo_double + ; CHECK: adrp {{x[0-9]+}}, d1 + ; CHECK-NEXT: ldr d12, [{{x[0-9]+}}, :lo12:d1] + ; CHECK-NEXT: bl bar_double + ; CHECK-NEXT: ret + + %0 = load double* @d1 + tail call ghccc void @bar_double( double %0 ) nounwind + ret void +} + +declare ghccc void @foo () + +declare ghccc void @bar_i64 (i64) +declare ghccc void @bar_float (float) +declare ghccc void @bar_double (double) diff --git a/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll new file mode 100644 index 000000000000..f3cc3d82121f --- /dev/null +++ b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T +; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M + +; CHECK-LABEL: foo +define i32 @foo(i32 %z, ...) #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i32, align 4 + %e = alloca i32, align 4 + %f = alloca i32, align 4 + %g = alloca i32, align 4 + %h = alloca i32, align 4 + + store i32 1, i32* %a, align 4 + store i32 2, i32* %b, align 4 + store i32 3, i32* %c, align 4 + store i32 4, i32* %d, align 4 + store i32 5, i32* %e, align 4 + store i32 6, i32* %f, align 4 + store i32 7, i32* %g, align 4 + store i32 8, i32* %h, align 4 + + %0 = load i32* %a, align 4 + %1 = load i32* %b, align 4 + %2 = load i32* %c, align 4 + %3 = load i32* %d, align 4 + %4 = load i32* %e, align 4 + %5 = load i32* %f, align 4 + %6 = load i32* %g, align 4 + %7 = load i32* %h, align 4 + + %add = add nsw i32 %0, %1 + %add4 = add nsw i32 %add, %2 + %add5 = add nsw i32 %add4, %3 + %add6 = add nsw i32 %add5, %4 + %add7 = add nsw i32 %add6, %5 + %add8 = add nsw i32 %add7, %6 + %add9 = add nsw i32 %add8, %7 + + %addz = add nsw i32 %add9, %z + call void @llvm.va_start(i8* null) + ret i32 %addz + +; CHECK: sub sp, #40 +; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8 + +; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]] +; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]] +; CHECK-NEXT: adds [[NEWBASE]], #8 +; CHECK-NEXT: ldm [[NEWBASE]], +} + +declare void @llvm.va_start(i8*) nounwind diff --git a/test/CodeGen/Mips/fcmp.ll b/test/CodeGen/Mips/fcmp.ll index b7759831c5a2..8e83b0064ed9 100644 --- a/test/CodeGen/Mips/fcmp.ll +++ b/test/CodeGen/Mips/fcmp.ll @@ -781,3 +781,93 @@ define i32 @true_f64(double %a, double %b) nounwind { %2 = zext i1 %1 to i32 ret i32 %2 } + +; The optimizers sometimes produce setlt instead of setolt/setult. +define float @bug1_f32(float %angle, float %at) #0 { +entry: +; ALL-LABEL: bug1_f32: + +; 32-C-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 +; 32-C-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)( +; 32-C-DAG: c.ole.s $[[T0]], $[[T1]] +; 32-C-DAG: bc1t + +; 32-CMP-DAG: add.s $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %lo($CPI32_0)( +; 32-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 32-CMP-DAG: bnez $[[T4]], + +; 64-C-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 +; 64-C-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( +; 64-C-DAG: c.ole.s $[[T0]], $[[T1]] +; 64-C-DAG: bc1t + +; 64-CMP-DAG: add.s $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: lwc1 $[[T1:f[0-9]+]], %got_ofst($CPI32_0)( +; 64-CMP-DAG: cmp.le.s $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 64-CMP-DAG: bnez $[[T4]], + + %add = fadd fast float %at, %angle + %cmp = fcmp ogt float %add, 1.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = fadd fast float %add, -1.000000e+00 + br label %if.end + +if.end: + %theta.0 = phi float [ %sub, %if.then ], [ %add, %entry ] + ret float %theta.0 +} + +; The optimizers sometimes produce setlt instead of setolt/setult. +define double @bug1_f64(double %angle, double %at) #0 { +entry: +; ALL-LABEL: bug1_f64: + +; 32-C-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 +; 32-C-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)( +; 32-C-DAG: c.ole.d $[[T0]], $[[T1]] +; 32-C-DAG: bc1t + +; 32-CMP-DAG: add.d $[[T0:f[0-9]+]], $f14, $f12 +; 32-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %lo($CPI33_0)( +; 32-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 32-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 32-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 32-CMP-DAG: bnez $[[T4]], + +; 64-C-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 +; 64-C-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( +; 64-C-DAG: c.ole.d $[[T0]], $[[T1]] +; 64-C-DAG: bc1t + +; 64-CMP-DAG: add.d $[[T0:f[0-9]+]], $f13, $f12 +; 64-CMP-DAG: ldc1 $[[T1:f[0-9]+]], %got_ofst($CPI33_0)( +; 64-CMP-DAG: cmp.le.d $[[T2:f[0-9]+]], $[[T0]], $[[T1]] +; 64-CMP-DAG: mfc1 $[[T3:[0-9]+]], $[[T2]] +; FIXME: This instruction is redundant. +; 64-CMP-DAG: andi $[[T4:[0-9]+]], $[[T3]], 1 +; 64-CMP-DAG: bnez $[[T4]], + + %add = fadd fast double %at, %angle + %cmp = fcmp ogt double %add, 1.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %sub = fadd fast double %add, -1.000000e+00 + br label %if.end + +if.end: + %theta.0 = phi double [ %sub, %if.then ], [ %add, %entry ] + ret double %theta.0 +} + +attributes #0 = { nounwind readnone "no-nans-fp-math"="true" } diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll index 72737ae273e6..9d0509b38d8a 100644 --- a/test/CodeGen/R600/basic-loop.ll +++ b/test/CodeGen/R600/basic-loop.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index a47bc876cb96..c64f443ad697 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -24,8 +24,7 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: ; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -40,8 +39,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: ; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: buffer_load_dword [[VAL1:v[0-9]+]], -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -73,8 +71,8 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace( } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -87,10 +85,10 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -105,14 +103,14 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT @@ -131,22 +129,22 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 -; SI: v_bcnt_u32_b32_e32 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 +; SI: v_bcnt_u32_b32_e64 ; SI: s_endpgm ; EG: BCNT_INT diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll index 8dfe571d3477..9758ac96ea9b 100644 --- a/test/CodeGen/R600/ctpop64.ll +++ b/test/CodeGen/R600/ctpop64.ll @@ -21,8 +21,7 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind { ; FUNC-LABEL: {{^}}v_ctpop_i64: ; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0 -; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]] +; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll index 24834af20404..efd875e93176 100644 --- a/test/CodeGen/R600/ds_read2st64.ll +++ b/test/CodeGen/R600/ds_read2st64.ll @@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { @@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index 35cfb03d39b4..d76e8a341c6f 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -1,16 +1,27 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC +declare float @llvm.fabs.f32(float) #0 + ; FUNC-LABEL: {{^}}fp_to_sint_i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; SI: v_cvt_i32_f32_e32 ; SI: s_endpgm -define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) { +define void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { %conv = fptosi float %in to i32 store i32 %conv, i32 addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs: +; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { + %in.fabs = call float @llvm.fabs.f32(float %in) #0 + %conv = fptosi float %in.fabs to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}fp_to_sint_v2i32: ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} @@ -214,3 +225,5 @@ define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { store <4 x i64> %conv, <4 x i64> addrspace(1)* %out ret void } + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll index 2e79866362ac..5ce3beaa16c0 100644 --- a/test/CodeGen/R600/hsa.ll +++ b/test/CodeGen/R600/hsa.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s ; HSA: {{^}}simple: +; HSA: .section .hsa.version +; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0" ; Make sure we are setting the ATC bit: ; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll new file mode 100644 index 000000000000..6290ca09d502 --- /dev/null +++ b/test/CodeGen/R600/misaligned-load.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI: @byte_aligned_load64 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: s_endpgm +define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) { +entry: + %0 = load i64 addrspace(3)* %in, align 1 + store i64 %0, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll new file mode 100644 index 000000000000..740328a495da --- /dev/null +++ b/test/CodeGen/R600/scratch-buffer.ll @@ -0,0 +1,86 @@ +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s + +; When a frame index offset is more than 12-bits, make sure we don't store +; it in mubuf's offset field. + +; Also, make sure we use the same register for storing the scratch buffer addresss +; for both stores. This register is allocated by the register scavenger, so we +; should be able to reuse the same regiser for each scratch buffer access. + +; CHECK-LABEL: {{^}}legal_offset_fi: +; CHECK: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0{{$}} +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_mov_b32_e32 [[OFFSET]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0 + store i32 1, i32* %scratchptr0 + + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0 + store i32 2, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void + + ret void + +} + +; CHECK-LABEL: {{^}}legal_offset_fi_offset +; CHECK: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen +; CHECK: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; CHECK: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} + +define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { +entry: + %scratch0 = alloca [8192 x i32] + %scratch1 = alloca [8192 x i32] + + %offset0 = load i32 addrspace(1)* %offsets + %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0 + store i32 %offset0, i32* %scratchptr0 + + %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1 + %offset1 = load i32 addrspace(1)* %offsetptr1 + %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1 + store i32 %offset1, i32* %scratchptr1 + + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %if, label %else + +if: + %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset + %if_value = load i32* %if_ptr + br label %done + +else: + %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset + %else_value = load i32* %else_ptr + br label %done + +done: + %value = phi i32 [%if_value, %if], [%else_value, %else] + store i32 %value, i32 addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll index b2f4a9ff05e1..f6dcb388248a 100644 --- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll @@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: buffer_store_dword ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll index 373a1967307a..e8315f17ebb6 100644 --- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll +++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparc -no-integrated-as +; RUN: llc < %s -march=sparc ; PR 1557 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128" diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll index 526cde8de8b4..2650533b7fec 100644 --- a/test/CodeGen/SPARC/inlineasm.ll +++ b/test/CodeGen/SPARC/inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s +; RUN: llc -march=sparc <%s | FileCheck %s ; CHECK-LABEL: test_constraint_r ; CHECK: add %o1, %o0, %o0 diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll index 6a67616d53be..6013b17d9372 100644 --- a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll +++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparc -no-integrated-as +; RUN: llc < %s -march=sparc ; ModuleID = 'mult-alt-generic.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32" target triple = "sparc" diff --git a/test/DebugInfo/Mips/fn-call-line.ll b/test/DebugInfo/Mips/fn-call-line.ll new file mode 100644 index 000000000000..14cd8c9d987f --- /dev/null +++ b/test/DebugInfo/Mips/fn-call-line.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=mips-linux-gnu -filetype=asm -asm-verbose=0 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=mips-linux-gnu -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=line - | FileCheck %s --check-prefix=INT + +; Mips used to generate 'jumpy' debug line info around calls. The address +; calculation for each call to f1() would share the same line info so it would +; emit output of the form: +; .loc $first_call_location +; .. address calculation .. +; .. function call .. +; .. address calculation .. +; .loc $second_call_location +; .. function call .. +; .loc $first_call_location +; .. address calculation .. +; .loc $third_call_location +; .. function call .. +; ... +; which would cause confusing stepping behaviour for the end user. +; +; This test checks that we emit more user friendly debug line info of the form: +; .loc $first_call_location +; .. address calculation .. +; .. function call .. +; .loc $second_call_location +; .. address calculation .. +; .. function call .. +; .loc $third_call_location +; .. address calculation .. +; .. function call .. +; ... +; +; Generated with clang from fn-call-line.c: +; void f1(); +; void f2() { +; f1(); +; f1(); +; } + +; CHECK: .loc 1 3 3 +; CHECK-NOT: .loc +; CHECK: %call16(f1) +; CHECK-NOT: .loc +; CHECK: .loc 1 4 3 +; CHECK-NOT: .loc +; CHECK: %call16(f1) + +; INT: {{^}}Address +; INT: ----- +; INT-NEXT: 2 0 1 0 0 is_stmt{{$}} +; INT-NEXT: 3 3 1 0 0 is_stmt prologue_end{{$}} +; INT-NEXT: 4 3 1 0 0 is_stmt{{$}} + + +; Function Attrs: nounwind uwtable +define void @f2() #0 { +entry: + call void (...)* @f1(), !dbg !11 + call void (...)* @f1(), !dbg !12 + ret void, !dbg !13 +} + +declare void @f1(...) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = !{!"0x11\0012\00clang version 3.7.0 (trunk 226641)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/fn-call-line.c] [DW_LANG_C99] +!1 = !{!"fn-call-line.c", !"/tmp/dbginfo"} +!2 = !{} +!3 = !{!4} +!4 = !{!"0x2e\00f2\00f2\00\002\000\001\000\000\000\000\002", !1, !5, !6, null, void ()* @f2, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f2] +!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/fn-call-line.c] +!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = !{null} +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 2} +!10 = !{!"clang version 3.7.0 (trunk 226641)"} +!11 = !MDLocation(line: 3, column: 3, scope: !4) +!12 = !MDLocation(line: 4, column: 3, scope: !4) +!13 = !MDLocation(line: 5, column: 1, scope: !4) diff --git a/test/DebugInfo/X86/decl-derived-member.ll b/test/DebugInfo/X86/decl-derived-member.ll index 8c15c53e8141..c4f3dd90ffd5 100644 --- a/test/DebugInfo/X86/decl-derived-member.ll +++ b/test/DebugInfo/X86/decl-derived-member.ll @@ -7,8 +7,9 @@ ; struct base { ; virtual ~base(); ; }; +; typedef base base_type; ; struct foo { -; base b; +; base_type b; ; }; ; foo f; @@ -20,40 +21,47 @@ %struct.foo = type { %struct.base } %struct.base = type { i32 (...)** } + +$_ZN3fooC2Ev = comdat any + +$_ZN3fooD2Ev = comdat any + +$_ZN4baseC2Ev = comdat any + @f = global %struct.foo zeroinitializer, align 8 @__dso_handle = external global i8 @_ZTV4base = external unnamed_addr constant [4 x i8*] -@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }] +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, i8* null }] define internal void @__cxx_global_var_init() section ".text.startup" { entry: - call void @_ZN3fooC2Ev(%struct.foo* @f) #2, !dbg !35 - %0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD2Ev to void (i8*)*), i8* bitcast (%struct.foo* @f to i8*), i8* @__dso_handle) #2, !dbg !35 - ret void, !dbg !35 + call void @_ZN3fooC2Ev(%struct.foo* @f) #2, !dbg !33 + %0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD2Ev to void (i8*)*), i8* bitcast (%struct.foo* @f to i8*), i8* @__dso_handle) #2, !dbg !33 + ret void, !dbg !33 } ; Function Attrs: inlinehint nounwind uwtable -define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 align 2 { +define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 comdat align 2 { entry: %this.addr = alloca %struct.foo*, align 8 store %struct.foo* %this, %struct.foo** %this.addr, align 8 - call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !38 + call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !34, metadata !36), !dbg !37 %this1 = load %struct.foo** %this.addr - %b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !39 - call void @_ZN4baseC2Ev(%struct.base* %b) #2, !dbg !39 - ret void, !dbg !39 + %b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !38 + call void @_ZN4baseC2Ev(%struct.base* %b) #2, !dbg !38 + ret void, !dbg !38 } ; Function Attrs: inlinehint uwtable -define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 align 2 { +define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 comdat align 2 { entry: %this.addr = alloca %struct.foo*, align 8 store %struct.foo* %this, %struct.foo** %this.addr, align 8 - call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !40, metadata !{!"0x102"}), !dbg !41 + call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !39, metadata !36), !dbg !40 %this1 = load %struct.foo** %this.addr - %b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !42 - call void @_ZN4baseD1Ev(%struct.base* %b), !dbg !42 - ret void, !dbg !44 + %b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !41 + call void @_ZN4baseD1Ev(%struct.base* %b), !dbg !41 + ret void, !dbg !43 } ; Function Attrs: nounwind @@ -62,24 +70,24 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2 ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #3 -declare void @_ZN4baseD1Ev(%struct.base*) #4 - ; Function Attrs: inlinehint nounwind uwtable -define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 align 2 { +define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 comdat align 2 { entry: %this.addr = alloca %struct.base*, align 8 store %struct.base* %this, %struct.base** %this.addr, align 8 - call void @llvm.dbg.declare(metadata %struct.base** %this.addr, metadata !45, metadata !{!"0x102"}), !dbg !47 + call void @llvm.dbg.declare(metadata %struct.base** %this.addr, metadata !44, metadata !36), !dbg !46 %this1 = load %struct.base** %this.addr - %0 = bitcast %struct.base* %this1 to i8***, !dbg !48 - store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4base, i64 0, i64 2), i8*** %0, !dbg !48 - ret void, !dbg !48 + %0 = bitcast %struct.base* %this1 to i32 (...)***, !dbg !47 + store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV4base, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, !dbg !47 + ret void, !dbg !47 } -define internal void @_GLOBAL__I_a() section ".text.startup" { +declare void @_ZN4baseD1Ev(%struct.base*) #4 + +define internal void @_GLOBAL__sub_I_decl_derived_member.cpp() section ".text.startup" { entry: - call void @__cxx_global_var_init(), !dbg !49 - ret void, !dbg !49 + call void @__cxx_global_var_init(), !dbg !48 + ret void } attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } @@ -89,56 +97,55 @@ attributes #3 = { nounwind readnone } attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!32, !33} -!llvm.ident = !{!34} +!llvm.module.flags = !{!30, !31} +!llvm.ident = !{!32} -!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)\000\00\000\00\001", !1, !2, !3, !8, !30, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cc] [DW_LANG_C_plus_plus] -!1 = !{!"foo.cc", !"/usr/local/google/home/echristo"} +!0 = !{!"0x11\004\00clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)\000\00\000\00\001", !1, !2, !3, !9, !28, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/decl-derived-member.cpp] [DW_LANG_C_plus_plus] +!1 = !{!"decl-derived-member.cpp", !"/tmp/dbginfo"} !2 = !{} -!3 = !{!4, !7} +!3 = !{!4, !8} !4 = !{!"0x13\00foo\005\0064\0064\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 5, size 64, align 64, offset 0] [def] [from ] !5 = !{!6} -!6 = !{!"0xd\00b\006\0064\0064\000\000", !1, !"_ZTS3foo", !"_ZTS4base"} ; [ DW_TAG_member ] [b] [line 6, size 64, align 64, offset 0] [from _ZTS4base] -!7 = !{!"0x13\00base\001\000\000\000\004\000", !1, null, null, null, null, null, !"_ZTS4base"} ; [ DW_TAG_structure_type ] [base] [line 1, size 0, align 0, offset 0] [decl] [from ] -!8 = !{!9, !13, !19, !22, !28} -!9 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\009\001\001\000\006\00256\000\009", !1, !10, !11, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [__cxx_global_var_init] -!10 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/foo.cc] -!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!12 = !{null} -!13 = !{!"0x2e\00~foo\00~foo\00_ZN3fooD2Ev\005\000\001\000\006\00320\000\005", !1, !"_ZTS3foo", !14, null, void (%struct.foo*)* @_ZN3fooD2Ev, null, !17, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [~foo] -!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!15 = !{null, !16} -!16 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo] -!17 = !{!"0x2e\00~foo\00~foo\00\000\000\000\000\006\00320\000\000", null, !"_ZTS3foo", !14, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 0] [~foo] -!18 = !{i32 786468} -!19 = !{!"0x2e\00foo\00foo\00_ZN3fooC2Ev\005\000\001\000\006\00320\000\005", !1, !"_ZTS3foo", !14, null, void (%struct.foo*)* @_ZN3fooC2Ev, null, !20, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [foo] -!20 = !{!"0x2e\00foo\00foo\00\000\000\000\000\006\00320\000\000", null, !"_ZTS3foo", !14, null, null, null, i32 0, !21} ; [ DW_TAG_subprogram ] [line 0] [foo] -!21 = !{i32 786468} -!22 = !{!"0x2e\00base\00base\00_ZN4baseC2Ev\001\000\001\000\006\00320\000\001", !1, !"_ZTS4base", !23, null, void (%struct.base*)* @_ZN4baseC2Ev, null, !26, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [base] -!23 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!24 = !{null, !25} -!25 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4base] -!26 = !{!"0x2e\00base\00base\00\000\000\000\000\006\00320\000\000", null, !"_ZTS4base", !23, null, null, null, i32 0, !27} ; [ DW_TAG_subprogram ] [line 0] [base] -!27 = !{i32 786468} -!28 = !{!"0x2e\00\00\00_GLOBAL__I_a\001\001\001\000\006\0064\000\001", !1, !10, !29, null, void ()* @_GLOBAL__I_a, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [local] [def] -!29 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!30 = !{!31} -!31 = !{!"0x34\00f\00f\00\009\000\001", null, !10, !4, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 9] [def] -!32 = !{i32 2, !"Dwarf Version", i32 4} -!33 = !{i32 1, !"Debug Info Version", i32 2} -!34 = !{!"clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)"} -!35 = !MDLocation(line: 9, scope: !9) -!36 = !{!"0x101\00this\0016777216\001088", !19, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0] -!37 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo] -!38 = !MDLocation(line: 0, scope: !19) -!39 = !MDLocation(line: 5, scope: !19) -!40 = !{!"0x101\00this\0016777216\001088", !13, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0] -!41 = !MDLocation(line: 0, scope: !13) -!42 = !MDLocation(line: 5, scope: !43) -!43 = !{!"0xb\005\000\000", !1, !13} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cc] -!44 = !MDLocation(line: 5, scope: !13) -!45 = !{!"0x101\00this\0016777216\001088", !22, null, !46} ; [ DW_TAG_arg_variable ] [this] [line 0] -!46 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4base] -!47 = !MDLocation(line: 0, scope: !22) -!48 = !MDLocation(line: 1, scope: !22) -!49 = !MDLocation(line: 1, scope: !28) +!6 = !{!"0xd\00b\006\0064\0064\000\000", !1, !"_ZTS3foo", !7} ; [ DW_TAG_member ] [b] [line 6, size 64, align 64, offset 0] [from base_type] +!7 = !{!"0x16\00base_type\004\000\000\000\000", !1, null, !"_ZTS4base"} ; [ DW_TAG_typedef ] [base_type] [line 4, size 0, align 0, offset 0] [from _ZTS4base] +!8 = !{!"0x13\00base\001\000\000\000\004\000", !1, null, null, null, null, null, !"_ZTS4base"} ; [ DW_TAG_structure_type ] [base] [line 1, size 0, align 0, offset 0] [decl] [from ] +!9 = !{!10, !14, !19, !24, !26} +!10 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\008\001\001\000\000\00256\000\008", !1, !11, !12, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 8] [local] [def] [__cxx_global_var_init] +!11 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/decl-derived-member.cpp] +!12 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!13 = !{null} +!14 = !{!"0x2e\00foo\00foo\00_ZN3fooC2Ev\005\000\001\000\000\00320\000\005", !1, !"_ZTS3foo", !15, null, void (%struct.foo*)* @_ZN3fooC2Ev, null, !18, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [foo] +!15 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!16 = !{null, !17} +!17 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo] +!18 = !{!"0x2e\00foo\00foo\00\000\000\000\000\000\00320\000\000", null, !"_ZTS3foo", !15, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 0] [foo] +!19 = !{!"0x2e\00base\00base\00_ZN4baseC2Ev\001\000\001\000\000\00320\000\001", !1, !"_ZTS4base", !20, null, void (%struct.base*)* @_ZN4baseC2Ev, null, !23, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [base] +!20 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!21 = !{null, !22} +!22 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4base] +!23 = !{!"0x2e\00base\00base\00\000\000\000\000\000\00320\000\000", null, !"_ZTS4base", !20, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 0] [base] +!24 = !{!"0x2e\00~foo\00~foo\00_ZN3fooD2Ev\005\000\001\000\000\00320\000\005", !1, !"_ZTS3foo", !15, null, void (%struct.foo*)* @_ZN3fooD2Ev, null, !25, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [~foo] +!25 = !{!"0x2e\00~foo\00~foo\00\000\000\000\000\000\00320\000\000", null, !"_ZTS3foo", !15, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 0] [~foo] +!26 = !{!"0x2e\00\00\00_GLOBAL__sub_I_decl_derived_member.cpp\000\001\001\000\000\0064\000\000", !1, !11, !27, null, void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, null, null, !2} ; [ DW_TAG_subprogram ] [line 0] [local] [def] +!27 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!28 = !{!29} +!29 = !{!"0x34\00f\00f\00\008\000\001", null, !11, !"_ZTS3foo", %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 8] [def] +!30 = !{i32 2, !"Dwarf Version", i32 4} +!31 = !{i32 2, !"Debug Info Version", i32 2} +!32 = !{!"clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)"} +!33 = !MDLocation(line: 8, column: 5, scope: !10) +!34 = !{!"0x101\00this\0016777216\001088", !14, null, !35} ; [ DW_TAG_arg_variable ] [this] [line 0] +!35 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo] +!36 = !{!"0x102"} ; [ DW_TAG_expression ] +!37 = !MDLocation(line: 0, scope: !14) +!38 = !MDLocation(line: 5, column: 8, scope: !14) +!39 = !{!"0x101\00this\0016777216\001088", !24, null, !35} ; [ DW_TAG_arg_variable ] [this] [line 0] +!40 = !MDLocation(line: 0, scope: !24) +!41 = !MDLocation(line: 5, column: 8, scope: !42) +!42 = !{!"0xb\005\008\002", !1, !24} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/decl-derived-member.cpp] +!43 = !MDLocation(line: 5, column: 8, scope: !24) +!44 = !{!"0x101\00this\0016777216\001088", !19, null, !45} ; [ DW_TAG_arg_variable ] [this] [line 0] +!45 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4base] +!46 = !MDLocation(line: 0, scope: !19) +!47 = !MDLocation(line: 1, column: 8, scope: !19) +!48 = !MDLocation(line: 0, scope: !26) diff --git a/test/MC/ARM/pr22395.s b/test/MC/ARM/pr22395.s new file mode 100644 index 000000000000..5da5d964298c --- /dev/null +++ b/test/MC/ARM/pr22395.s @@ -0,0 +1,63 @@ +@ RUN: llvm-mc -triple armv4t-eabi -filetype asm -o - %s 2>&1 | FileCheck %s + + .text + .thumb + + .p2align 2 + + .fpu neon + vldmia r0, {d16-d31} + +@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +@ CHECK-NOT: error: instruction requires: VFP2 + + .fpu vfpv3 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu vfpv3-d16 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu vfpv4 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu vfpv4-d16 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu fpv5-d16 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu fp-armv8 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu fp-armv8 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu neon + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu neon-vfpv4 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + + .fpu crypto-neon-fp-armv8 + vadd.f32 s1, s2, s3 +@ CHECK: vadd.f32 s1, s2, s3 +@ CHECK-NOT: error: instruction requires: VPF2 + diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s index ba12df0d3b63..83c524b9d07b 100644 --- a/test/MC/ELF/relocation-386.s +++ b/test/MC/ELF/relocation-386.s @@ -63,6 +63,8 @@ // Relocation 28 (und_symbol-bar2) is of type R_386_PC8 // CHECK-NEXT: 0xA0 R_386_PC8 und_symbol 0x0 // CHECK-NEXT: 0xA3 R_386_GOTOFF und_symbol 0x0 +// Relocation 29 (zed@PLT) is of type R_386_PLT32 and uses the symbol +// CHECK-NEXT: 0xA9 R_386_PLT32 zed 0x0 // CHECK-NEXT: } // CHECK-NEXT: ] @@ -129,6 +131,7 @@ bar2: .byte und_symbol-bar2 leal 1 + und_symbol@GOTOFF, %edi + movl zed@PLT(%eax), %eax .section zedsec,"awT",@progbits zed: diff --git a/test/MC/MachO/AArch64/mergeable.s b/test/MC/MachO/AArch64/mergeable.s deleted file mode 100644 index fc6ec04f37bf..000000000000 --- a/test/MC/MachO/AArch64/mergeable.s +++ /dev/null @@ -1,59 +0,0 @@ -// RUN: llvm-mc -triple aarch64-apple-darwin14 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s - -// Test that we "S + K" produce a relocation with a symbol, but just S produces -// a relocation with the section. - - .section __TEXT,__literal4,4byte_literals -L0: - .long 42 - - .section __TEXT,__cstring,cstring_literals -L1: - .asciz "42" - - .section __DATA,__data - .quad L0 - .quad L0 + 1 - .quad L1 - .quad L1 + 1 - -// CHECK: Relocations [ -// CHECK-NEXT: Section __data { -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x18 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 1 -// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: L1 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x10 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 1 -// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: L1 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x8 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 1 -// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: L0 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x0 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 0 -// CHECK-NEXT: Type: ARM64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: 0x2 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: ] diff --git a/test/MC/MachO/x86_64-mergeable.s b/test/MC/MachO/x86_64-mergeable.s deleted file mode 100644 index 972477693ed2..000000000000 --- a/test/MC/MachO/x86_64-mergeable.s +++ /dev/null @@ -1,59 +0,0 @@ -// RUN: llvm-mc -triple x86_64-apple-darwin14 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s - -// Test that we "S + K" produce a relocation with a symbol, but just S produces -// a relocation with the section. - - .section __TEXT,__literal4,4byte_literals -L0: - .long 42 - - .section __TEXT,__cstring,cstring_literals -L1: - .asciz "42" - - .section __DATA,__data - .quad L0 - .quad L0 + 1 - .quad L1 - .quad L1 + 1 - -// CHECK: Relocations [ -// CHECK-NEXT: Section __data { -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x18 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 1 -// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: L1 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x10 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 0 -// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: 0x3 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x8 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 1 -// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: L0 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: Relocation { -// CHECK-NEXT: Offset: 0x0 -// CHECK-NEXT: PCRel: 0 -// CHECK-NEXT: Length: 3 -// CHECK-NEXT: Extern: 0 -// CHECK-NEXT: Type: X86_64_RELOC_UNSIGNED (0) -// CHECK-NEXT: Symbol: 0x2 -// CHECK-NEXT: Scattered: 0 -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: ] diff --git a/test/MC/MachO/x86_64-symbols.s b/test/MC/MachO/x86_64-symbols.s index f40183df853c..e2dcc440fb5f 100644 --- a/test/MC/MachO/x86_64-symbols.s +++ b/test/MC/MachO/x86_64-symbols.s @@ -121,12 +121,6 @@ D38: //L39: //D39: - .section foo, bar - .long L4 + 1 - .long L35 + 1 - .long L36 + 1 - .long L37 + 1 - .long L38 + 1 // CHECK: Symbols [ // CHECK-NEXT: Symbol { diff --git a/test/SymbolRewriter/rewrite.ll b/test/SymbolRewriter/rewrite.ll index 716fff9f284c..e8a0db6d606c 100644 --- a/test/SymbolRewriter/rewrite.ll +++ b/test/SymbolRewriter/rewrite.ll @@ -28,12 +28,40 @@ entry: ret void } +$source_comdat_function = comdat any +define dllexport void @source_comdat_function() comdat($source_comdat_function) { +entry: + ret void +} + +$source_comdat_function_1 = comdat exactmatch +define dllexport void @source_comdat_function_1() comdat($source_comdat_function_1) { +entry: + ret void +} + +$source_comdat_variable = comdat largest +@source_comdat_variable = global i32 32, comdat($source_comdat_variable) + +$source_comdat_variable_1 = comdat noduplicates +@source_comdat_variable_1 = global i32 64, comdat($source_comdat_variable_1) + +; CHECK: $target_comdat_function = comdat any +; CHECK: $target_comdat_function_1 = comdat exactmatch +; CHECK: $target_comdat_variable = comdat largest +; CHECK: $target_comdat_variable_1 = comdat noduplicates + ; CHECK: @target_variable = external global i32 ; CHECK-NOT: @source_variable = external global i32 ; CHECK: @target_pattern_variable = external global i32 ; CHECK-NOT: @source_pattern_variable = external global i32 ; CHECK: @target_pattern_multiple_variable_matches = external global i32 ; CHECK-NOT: @source_pattern_multiple_variable_matches = external global i32 +; CHECK: @target_comdat_variable = global i32 32, comdat +; CHECK-NOT: @source_comdat_variable = global i32 32, comdat +; CHECK: @target_comdat_variable_1 = global i32 64, comdat +; CHECK-NOT: @source_comdat_variable_1 = global i32 64, comdat + ; CHECK: declare void @target_function() ; CHECK-NOT: declare void @source_function() ; CHECK: declare void @target_pattern_function() @@ -57,3 +85,8 @@ entry: ; CHECK: ret i32 %res ; CHECK: } +; CHECK: define dllexport void @target_comdat_function() comdat +; CHECK-NOT: define dllexport void @source_comdat_function() comdat +; CHECK: define dllexport void @target_comdat_function_1() comdat +; CHECK-NOT: define dllexport void @source_comdat_function_1() comdat + diff --git a/test/SymbolRewriter/rewrite.map b/test/SymbolRewriter/rewrite.map index ef6dfc8ca2b9..8094939d088d 100644 --- a/test/SymbolRewriter/rewrite.map +++ b/test/SymbolRewriter/rewrite.map @@ -44,3 +44,23 @@ global alias: { target: _ZN1SD1Ev, } +function: { + source: source_comdat_function, + target: target_comdat_function, +} + +function: { + source: source_comdat_function_(.*), + transform: target_comdat_function_\1, +} + +global variable: { + source: source_comdat_variable, + target: target_comdat_variable, +} + +global variable: { + source: source_comdat_variable_(.*), + transform: target_comdat_variable_\1, +} + diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll index 1dc285ec3d7c..f28a76bb42d6 100644 --- a/test/Transforms/GVN/edge.ll +++ b/test/Transforms/GVN/edge.ll @@ -69,11 +69,11 @@ if: br label %return return: - %retval.0 = phi double [ %div, %if ], [ %x, %entry ] - ret double %retval.0 + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval ; CHECK-LABEL: define double @fcmp_oeq( -; CHECK: %div = fdiv double %x, 2.000000e+00 +; CHECK: %div = fdiv double %x, 2.0 } define double @fcmp_une(double %x, double %y) { @@ -86,10 +86,46 @@ else: br label %return return: - %retval.0 = phi double [ %div, %else ], [ %x, %entry ] - ret double %retval.0 + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval ; CHECK-LABEL: define double @fcmp_une( -; CHECK: %div = fdiv double %x, 2.000000e+00 +; CHECK: %div = fdiv double %x, 2.0 } +; PR22376 - We can't propagate zero constants because -0.0 +; compares equal to 0.0. If %y is -0.0 in this test case, +; we would produce the wrong sign on the infinity return value. +define double @fcmp_oeq_zero(double %x, double %y) { +entry: + %cmp = fcmp oeq double %y, 0.0 + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_zero( +; CHECK: %div = fdiv double %x, %y +} + +define double @fcmp_une_zero(double %x, double %y) { +entry: + %cmp = fcmp une double %y, -0.0 + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_zero( +; CHECK: %div = fdiv double %x, %y +} diff --git a/test/Transforms/IndVarSimplify/pr22222.ll b/test/Transforms/IndVarSimplify/pr22222.ll new file mode 100644 index 000000000000..ccdfe538dfa5 --- /dev/null +++ b/test/Transforms/IndVarSimplify/pr22222.ll @@ -0,0 +1,46 @@ +; RUN: opt -indvars -S < %s | FileCheck %s + +@b = common global i32 0, align 4 +@c = common global i32 0, align 4 +@a = common global i32 0, align 4 + +declare void @abort() #1 + +; Function Attrs: nounwind ssp uwtable +define i32 @main() { +entry: + %a.promoted13 = load i32* @a, align 4 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.end + %or.lcssa14 = phi i32 [ %a.promoted13, %entry ], [ %or.lcssa, %for.end ] + %d.010 = phi i32 [ 1, %entry ], [ 0, %for.end ] + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %inc12 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ] + %or11 = phi i32 [ %or.lcssa14, %for.cond1.preheader ], [ %or, %for.body3 ] +; CHECK-NOT: sub nuw i32 %inc12, %d.010 +; CHECK: sub i32 %inc12, %d.010 + %add = sub i32 %inc12, %d.010 + %or = or i32 %or11, %add + %inc = add i32 %inc12, 1 + br i1 false, label %for.body3, label %for.end + +for.end: ; preds = %for.body3 + %or.lcssa = phi i32 [ %or, %for.body3 ] + br i1 false, label %for.cond1.preheader, label %for.end6 + +for.end6: ; preds = %for.end + %or.lcssa.lcssa = phi i32 [ %or.lcssa, %for.end ] + store i32 %or.lcssa.lcssa, i32* @a, align 4 + %cmp7 = icmp eq i32 %or.lcssa.lcssa, -1 + br i1 %cmp7, label %if.end, label %if.then + +if.then: ; preds = %for.end6 + tail call void @abort() #2 + unreachable + +if.end: ; preds = %for.end6 + ret i32 0 +} diff --git a/test/Transforms/IndVarSimplify/strengthen-overflow.ll b/test/Transforms/IndVarSimplify/strengthen-overflow.ll index 07e489e03382..2bafe96e1ccc 100644 --- a/test/Transforms/IndVarSimplify/strengthen-overflow.ll +++ b/test/Transforms/IndVarSimplify/strengthen-overflow.ll @@ -52,58 +52,6 @@ define i32 @test.signed.add.1(i32* %array, i32 %length, i32 %init) { ret i32 42 } -define i32 @test.signed.sub.0(i32* %array, i32 %length, i32 %init) { -; CHECK-LABEL: @test.signed.sub.0 - entry: - %upper = icmp sgt i32 %init, %length - br i1 %upper, label %loop, label %exit - - loop: -; CHECK-LABEL: loop - %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ] - %civ.inc = sub i32 %civ, 1 -; CHECK: %civ.inc = sub nsw i32 %civ, 1 - %cmp = icmp slt i32 %civ.inc, %length - br i1 %cmp, label %latch, label %break - - latch: - store i32 0, i32* %array - %check = icmp sgt i32 %civ.inc, %length - br i1 %check, label %loop, label %break - - break: - ret i32 %civ.inc - - exit: - ret i32 42 -} - -define i32 @test.signed.sub.1(i32* %array, i32 %length, i32 %init) { -; CHECK-LABEL: @test.signed.sub.1 - entry: - %upper = icmp sgt i32 %init, %length - br i1 %upper, label %loop, label %exit - - loop: -; CHECK-LABEL: loop - %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ] - %civ.inc = sub i32 %civ, 1 -; CHECK: %civ.inc = sub i32 %civ, 1 - %cmp = icmp slt i32 %civ.inc, %length - br i1 %cmp, label %latch, label %break - - latch: - store i32 0, i32* %array - %check = icmp sge i32 %civ.inc, %length - br i1 %check, label %loop, label %break - - break: - ret i32 %civ.inc - - exit: - ret i32 42 -} - define i32 @test.unsigned.add.0(i32* %array, i32 %length, i32 %init) { ; CHECK-LABEL: @test.unsigned.add.0 entry: @@ -156,59 +104,5 @@ define i32 @test.unsigned.add.1(i32* %array, i32 %length, i32 %init) { ret i32 42 } -define i32 @test.unsigned.sub.0(i32* %array, i32* %length_ptr, i32 %init) { -; CHECK-LABEL: @test.unsigned.sub.0 - entry: - %length = load i32* %length_ptr, !range !0 - %upper = icmp ult i32 %init, %length - br i1 %upper, label %loop, label %exit - - loop: -; CHECK-LABEL: loop - %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ] - %civ.inc = sub i32 %civ, 2 -; CHECK: %civ.inc = sub nuw i32 %civ, 2 - %cmp = icmp slt i32 %civ.inc, %length - br i1 %cmp, label %latch, label %break - - latch: - store i32 0, i32* %array - %check = icmp ult i32 %civ.inc, %length - br i1 %check, label %loop, label %break - - break: - ret i32 %civ.inc - - exit: - ret i32 42 -} - -define i32 @test.unsigned.sub.1(i32* %array, i32* %length_ptr, i32 %init) { -; CHECK-LABEL: @test.unsigned.sub.1 - entry: - %length = load i32* %length_ptr, !range !1 - %upper = icmp ult i32 %init, %length - br i1 %upper, label %loop, label %exit - - loop: -; CHECK-LABEL: loop - %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ] - %civ.inc = sub i32 %civ, 2 -; CHECK: %civ.inc = sub i32 %civ, 2 - %cmp = icmp slt i32 %civ.inc, %length - br i1 %cmp, label %latch, label %break - - latch: - store i32 0, i32* %array - %check = icmp ult i32 %civ.inc, %length - br i1 %check, label %loop, label %break - - break: - ret i32 %civ.inc - - exit: - ret i32 42 -} - !0 = !{i32 0, i32 2} !1 = !{i32 0, i32 42} diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll index b82dd99db36f..4a5c94961e29 100644 --- a/test/Transforms/InstCombine/call-cast-target.ll +++ b/test/Transforms/InstCombine/call-cast-target.ll @@ -61,3 +61,14 @@ entry: %call = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a) ret i32 %call } + +declare i32 @fn4(i32) "thunk" + +define i32 @test4(i32* %a) { +; CHECK-LABEL: @test4 +; CHECK: %[[call:.*]] = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a) +; CHECK-NEXT: ret i32 %[[call]] +entry: + %call = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a) + ret i32 %call +} diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll index 008b838201ed..ddaaf82a8e2d 100644 --- a/test/Transforms/InstCombine/memcpy_chk-1.ll +++ b/test/Transforms/InstCombine/memcpy_chk-1.ll @@ -15,46 +15,50 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Check cases where dstlen >= len. -define void @test_simplify1() { +define i8* @test_simplify1() { ; CHECK-LABEL: @test_simplify1( %dst = bitcast %struct.T1* @t1 to i8* %src = bitcast %struct.T2* @t2 to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64 - call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*) + %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret i8* %ret } -define void @test_simplify2() { +define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( %dst = bitcast %struct.T1* @t1 to i8* %src = bitcast %struct.T3* @t3 to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64 - call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*) + %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848) + ret i8* %ret } ; Check cases where dstlen < len. -define void @test_no_simplify1() { +define i8* @test_no_simplify1() { ; CHECK-LABEL: @test_no_simplify1( %dst = bitcast %struct.T3* @t3 to i8* %src = bitcast %struct.T1* @t1 to i8* -; CHECK-NEXT: call i8* @__memcpy_chk - call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824) - ret void +; CHECK-NEXT: %ret = call i8* @__memcpy_chk(i8* bitcast (%struct.T3* @t3 to i8*), i8* bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824) + ret i8* %ret } -define void @test_no_simplify2() { +define i8* @test_no_simplify2() { ; CHECK-LABEL: @test_no_simplify2( %dst = bitcast %struct.T1* @t1 to i8* %src = bitcast %struct.T2* @t2 to i8* -; CHECK-NEXT: call i8* @__memcpy_chk - call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0) - ret void +; CHECK-NEXT: %ret = call i8* @__memcpy_chk(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0) + ret i8* %ret } define i8* @test_simplify_return_indcall(i8* ()* %alloc) { diff --git a/test/Transforms/InstCombine/memmove_chk-1.ll b/test/Transforms/InstCombine/memmove_chk-1.ll index 6d93bbbf959e..e4e1f6eedf39 100644 --- a/test/Transforms/InstCombine/memmove_chk-1.ll +++ b/test/Transforms/InstCombine/memmove_chk-1.ll @@ -15,46 +15,50 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Check cases where dstlen >= len. -define void @test_simplify1() { +define i8* @test_simplify1() { ; CHECK-LABEL: @test_simplify1( %dst = bitcast %struct.T1* @t1 to i8* %src = bitcast %struct.T2* @t2 to i8* -; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64 - call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824) - ret void +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*) + %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824) + ret i8* %ret } -define void @test_simplify2() { +define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( %dst = bitcast %struct.T1* @t1 to i8* %src = bitcast %struct.T3* @t3 to i8* -; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64 - call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848) - ret void +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T3* @t3 to i8*), i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T1* @t1 to i8*) + %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848) + ret i8* %ret } ; Check cases where dstlen < len. -define void @test_no_simplify1() { +define i8* @test_no_simplify1() { ; CHECK-LABEL: @test_no_simplify1( %dst = bitcast %struct.T3* @t3 to i8* %src = bitcast %struct.T1* @t1 to i8* -; CHECK-NEXT: call i8* @__memmove_chk - call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824) - ret void +; CHECK-NEXT: %ret = call i8* @__memmove_chk(i8* bitcast (%struct.T3* @t3 to i8*), i8* bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824) + ret i8* %ret } -define void @test_no_simplify2() { +define i8* @test_no_simplify2() { ; CHECK-LABEL: @test_no_simplify2( %dst = bitcast %struct.T1* @t1 to i8* %src = bitcast %struct.T2* @t2 to i8* -; CHECK-NEXT: call i8* @__memmove_chk - call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0) - ret void +; CHECK-NEXT: %ret = call i8* @__memmove_chk(i8* bitcast (%struct.T1* @t1 to i8*), i8* bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0) + ret i8* %ret } declare i8* @__memmove_chk(i8*, i8*, i64, i64) diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll index 47cc7db998e4..27f7293a6bce 100644 --- a/test/Transforms/InstCombine/memset_chk-1.ll +++ b/test/Transforms/InstCombine/memset_chk-1.ll @@ -11,51 +11,56 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Check cases where dstlen >= len. -define void @test_simplify1() { +define i8* @test_simplify1() { ; CHECK-LABEL: @test_simplify1( %dst = bitcast %struct.T* @t to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 - call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824) - ret void +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*) + %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824) + ret i8* %ret } -define void @test_simplify2() { +define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( %dst = bitcast %struct.T* @t to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 - call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648) - ret void +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*) + %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648) + ret i8* %ret } -define void @test_simplify3() { +define i8* @test_simplify3() { ; CHECK-LABEL: @test_simplify3( %dst = bitcast %struct.T* @t to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 - call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1) - ret void +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i32 4, i1 false) +; CHECK-NEXT: ret i8* bitcast (%struct.T* @t to i8*) + %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1) + ret i8* %ret } ; Check cases where dstlen < len. -define void @test_no_simplify1() { +define i8* @test_no_simplify1() { ; CHECK-LABEL: @test_no_simplify1( %dst = bitcast %struct.T* @t to i8* -; CHECK-NEXT: call i8* @__memset_chk - call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400) - ret void +; CHECK-NEXT: %ret = call i8* @__memset_chk(i8* bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 400) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400) + ret i8* %ret } -define void @test_no_simplify2() { +define i8* @test_no_simplify2() { ; CHECK-LABEL: @test_no_simplify2( %dst = bitcast %struct.T* @t to i8* -; CHECK-NEXT: call i8* @__memset_chk - call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0) - ret void +; CHECK-NEXT: %ret = call i8* @__memset_chk(i8* bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 0) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0) + ret i8* %ret } declare i8* @__memset_chk(i8*, i32, i64, i64) diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll index 8a02529c61ca..393c5d96d10b 100644 --- a/test/Transforms/InstCombine/stpcpy_chk-1.ll +++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll @@ -11,46 +11,50 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 ; Check cases where slen >= strlen (src). -define void @test_simplify1() { +define i8* @test_simplify1() { ; CHECK-LABEL: @test_simplify1( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11) + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60) + ret i8* %ret } -define void @test_simplify2() { +define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11) + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12) + ret i8* %ret } -define void @test_simplify3() { +define i8* @test_simplify3() { ; CHECK-LABEL: @test_simplify3( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11) + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret } ; Check cases where there are no string constants. -define void @test_simplify4() { +define i8* @test_simplify4() { ; CHECK-LABEL: @test_simplify4( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 -; CHECK-NEXT: call i8* @stpcpy - call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) - ret void +; CHECK-NEXT: %stpcpy = call i8* @stpcpy(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i8* %stpcpy + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret } ; Check case where the string length is not constant. @@ -60,10 +64,11 @@ define i8* @test_simplify5() { %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK: @__memcpy_chk +; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false) +; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11) %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false) %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len) -; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11) ret i8* %ret } @@ -73,8 +78,9 @@ define i8* @test_simplify6() { ; CHECK-LABEL: @test_simplify6( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 -; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen -; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]] +; CHECK-NEXT: %strlen = call i32 @strlen(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)) +; CHECK-NEXT: %1 = getelementptr inbounds [60 x i8]* @a, i32 0, i32 %strlen +; CHECK-NEXT: ret i8* %1 %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false) %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len) ret i8* %ret @@ -82,14 +88,15 @@ define i8* @test_simplify6() { ; Check case where slen < strlen (src). -define void @test_no_simplify1() { +define i8* @test_no_simplify1() { ; CHECK-LABEL: @test_no_simplify1( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 -; CHECK-NEXT: call i8* @__stpcpy_chk - call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8) - ret void +; CHECK-NEXT: %ret = call i8* @__stpcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 8) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8) + ret i8* %ret } declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll index 8e7fec76ef5c..e3f163fd087d 100644 --- a/test/Transforms/InstCombine/strcpy_chk-1.ll +++ b/test/Transforms/InstCombine/strcpy_chk-1.ll @@ -11,59 +11,65 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 ; Check cases where slen >= strlen (src). -define void @test_simplify1() { +define i8* @test_simplify1() { ; CHECK-LABEL: @test_simplify1( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60) + ret i8* %ret } -define void @test_simplify2() { +define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12) + ret i8* %ret } -define void @test_simplify3() { +define i8* @test_simplify3() { ; CHECK-LABEL: @test_simplify3( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret } ; Check cases where there are no string constants. -define void @test_simplify4() { +define i8* @test_simplify4() { ; CHECK-LABEL: @test_simplify4( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 -; CHECK-NEXT: call i8* @strcpy - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) - ret void +; CHECK-NEXT: %strcpy = call i8* @strcpy(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0)) +; CHECK-NEXT: ret i8* %strcpy + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1) + ret i8* %ret } ; Check case where the string length is not constant. -define void @test_simplify5() { +define i8* @test_simplify5() { ; CHECK-LABEL: @test_simplify5( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK: @__memcpy_chk +; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false) +; CHECK-NEXT: %1 = call i8* @__memcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 %len) +; CHECK-NEXT: ret i8* %1 %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false) - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len) - ret void + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len) + ret i8* %ret } ; Check case where the source and destination are the same. @@ -72,7 +78,9 @@ define i8* @test_simplify6() { ; CHECK-LABEL: @test_simplify6( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 -; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) +; CHECK-NEXT: %len = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false) +; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i32 %len) +; CHECK-NEXT: ret i8* %ret %len = call i32 @llvm.objectsize.i32.p0i8(i8* %dst, i1 false) %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len) ret i8* %ret @@ -80,14 +88,15 @@ define i8* @test_simplify6() { ; Check case where slen < strlen (src). -define void @test_no_simplify1() { +define i8* @test_no_simplify1() { ; CHECK-LABEL: @test_no_simplify1( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strcpy_chk - call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) - ret void +; CHECK-NEXT: %ret = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 8) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8) + ret i8* %ret } declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll index 90b4173ced77..9242a8acdbbe 100644 --- a/test/Transforms/InstCombine/strncpy_chk-1.ll +++ b/test/Transforms/InstCombine/strncpy_chk-1.ll @@ -11,56 +11,61 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 ; Check cases where dstlen >= len -define void @test_simplify1() { +define i8* @test_simplify1() { ; CHECK-LABEL: @test_simplify1( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60) + ret i8* %ret } -define void @test_simplify2() { +define i8* @test_simplify2() { ; CHECK-LABEL: @test_simplify2( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12) - ret void +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 12, i32 1, i1 false) +; CHECK-NEXT: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0) + %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12) + ret i8* %ret } -define void @test_simplify3() { +define i8* @test_simplify3() { ; CHECK-LABEL: @test_simplify3( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 -; CHECK-NEXT: call i8* @strncpy - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60) - ret void +; CHECK-NEXT: %strncpy = call i8* @strncpy(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 12) +; CHECK-NEXT: ret i8* %strncpy + %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60) + ret i8* %ret } ; Check cases where dstlen < len -define void @test_no_simplify1() { +define i8* @test_no_simplify1() { ; CHECK-LABEL: @test_no_simplify1( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strncpy_chk - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4) - ret void +; CHECK-NEXT: %ret = call i8* @__strncpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 8, i32 4) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4) + ret i8* %ret } -define void @test_no_simplify2() { +define i8* @test_no_simplify2() { ; CHECK-LABEL: @test_no_simplify2( %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 -; CHECK-NEXT: call i8* @__strncpy_chk - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0) - ret void +; CHECK-NEXT: %ret = call i8* @__strncpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([60 x i8]* @b, i32 0, i32 0), i32 8, i32 0) +; CHECK-NEXT: ret i8* %ret + %ret = call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0) + ret i8* %ret } declare i8* @__strncpy_chk(i8*, i8*, i32, i32) diff --git a/test/tools/gold/emit-llvm.ll b/test/tools/gold/emit-llvm.ll index cfdc55108c0b..2c43147a2eab 100644 --- a/test/tools/gold/emit-llvm.ll +++ b/test/tools/gold/emit-llvm.ll @@ -21,6 +21,11 @@ target triple = "x86_64-unknown-linux-gnu" +@g7 = extern_weak global i32 +; CHECK-DAG: @g7 = extern_weak global i32 + +@g8 = external global i32 + ; CHECK: define internal void @f1() ; OPT-NOT: @f1 define hidden void @f1() { @@ -62,6 +67,13 @@ define linkonce_odr void @f6() unnamed_addr { } @g6 = global void()* @f6 +define i32* @f7() { + ret i32* @g7 +} + +define i32* @f8() { + ret i32* @g8 +} ; API: f1 PREVAILING_DEF_IRONLY ; API: f2 PREVAILING_DEF_IRONLY @@ -69,5 +81,9 @@ define linkonce_odr void @f6() unnamed_addr { ; API: f4 PREVAILING_DEF_IRONLY_EXP ; API: f5 PREVAILING_DEF_IRONLY_EXP ; API: f6 PREVAILING_DEF_IRONLY_EXP +; API: f7 PREVAILING_DEF_IRONLY_EXP +; API: f8 PREVAILING_DEF_IRONLY_EXP +; API: g7 UNDEF +; API: g8 UNDEF ; API: g5 PREVAILING_DEF_IRONLY_EXP ; API: g6 PREVAILING_DEF_IRONLY_EXP diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index bcc91e9d0612..5524bb9922c5 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -633,8 +633,10 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile, break; case LDPR_UNDEF: - assert(GV->hasComdat()); - Drop.insert(GV); + if (!GV->isDeclarationForLinker()) { + assert(GV->hasComdat()); + Drop.insert(GV); + } break; case LDPR_PREVAILING_DEF_IRONLY: { diff --git a/tools/llvm-c-test/CMakeLists.txt b/tools/llvm-c-test/CMakeLists.txt index baf970a02781..f22dffb30e8a 100644 --- a/tools/llvm-c-test/CMakeLists.txt +++ b/tools/llvm-c-test/CMakeLists.txt @@ -41,6 +41,7 @@ add_llvm_tool(llvm-c-test include-all.c main.c module.c + metadata.c object.c targets.c ) diff --git a/tools/llvm-c-test/llvm-c-test.h b/tools/llvm-c-test/llvm-c-test.h index 0a25aa606197..1b4976ae1423 100644 --- a/tools/llvm-c-test/llvm-c-test.h +++ b/tools/llvm-c-test/llvm-c-test.h @@ -27,6 +27,10 @@ int calc(void); // disassemble.c int disassemble(void); +// metadata.c +int add_named_metadata_operand(void); +int set_metadata(void); + // object.c int object_list_sections(void); int object_list_symbols(void); diff --git a/tools/llvm-c-test/main.c b/tools/llvm-c-test/main.c index 72f8b0428994..59cc749fb155 100644 --- a/tools/llvm-c-test/main.c +++ b/tools/llvm-c-test/main.c @@ -65,6 +65,10 @@ int main(int argc, char **argv) { return disassemble(); } else if (argc == 2 && !strcmp(argv[1], "--calc")) { return calc(); + } else if (argc == 2 && !strcmp(argv[1], "--add-named-metadata-operand")) { + return add_named_metadata_operand(); + } else if (argc == 2 && !strcmp(argv[1], "--set-metadata")) { + return set_metadata(); } else { print_usage(); } diff --git a/tools/llvm-c-test/metadata.c b/tools/llvm-c-test/metadata.c new file mode 100644 index 000000000000..b64a696c5281 --- /dev/null +++ b/tools/llvm-c-test/metadata.c @@ -0,0 +1,43 @@ +/*===-- object.c - tool for testing libLLVM and llvm-c API ----------------===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file implements the --add-named-metadata-operand and --set-metadata *| +|* commands in llvm-c-test. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#include "llvm-c-test.h" +#include "llvm-c/Core.h" + +int add_named_metadata_operand(void) { + LLVMModuleRef m = LLVMModuleCreateWithName("Mod"); + LLVMValueRef values[] = { LLVMConstInt(LLVMInt32Type(), 0, 0) }; + + // This used to trigger an assertion + LLVMAddNamedMetadataOperand(m, "name", LLVMMDNode(values, 1)); + + LLVMDisposeModule(m); + + return 0; +} + +int set_metadata(void) { + LLVMBuilderRef b = LLVMCreateBuilder(); + LLVMValueRef values[] = { LLVMConstInt(LLVMInt32Type(), 0, 0) }; + + // This used to trigger an assertion + LLVMSetMetadata( + LLVMBuildRetVoid(b), + LLVMGetMDKindID("kind", 4), + LLVMMDNode(values, 1)); + + LLVMDisposeBuilder(b); + + return 0; +} diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp index cacbde66f350..1f9aa32aeab0 100644 --- a/unittests/ADT/TripleTest.cpp +++ b/unittests/ADT/TripleTest.cpp @@ -665,3 +665,20 @@ TEST(TripleTest, getARMCPUForArch) { } } } + +TEST(TripleTest, NormalizeARM) { + EXPECT_EQ("armv6--netbsd-eabi", Triple::normalize("armv6-netbsd-eabi")); + EXPECT_EQ("armv7--netbsd-eabi", Triple::normalize("armv7-netbsd-eabi")); + EXPECT_EQ("armv6eb--netbsd-eabi", Triple::normalize("armv6eb-netbsd-eabi")); + EXPECT_EQ("armv7eb--netbsd-eabi", Triple::normalize("armv7eb-netbsd-eabi")); + EXPECT_EQ("armv6--netbsd-eabihf", Triple::normalize("armv6-netbsd-eabihf")); + EXPECT_EQ("armv7--netbsd-eabihf", Triple::normalize("armv7-netbsd-eabihf")); + EXPECT_EQ("armv6eb--netbsd-eabihf", Triple::normalize("armv6eb-netbsd-eabihf")); + EXPECT_EQ("armv7eb--netbsd-eabihf", Triple::normalize("armv7eb-netbsd-eabihf")); + + Triple T; + T = Triple("armv6--netbsd-eabi"); + EXPECT_EQ(Triple::arm, T.getArch()); + T = Triple("armv6eb--netbsd-eabi"); + EXPECT_EQ(Triple::armeb, T.getArch()); +} diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp index 62967bdd3270..f2a3000906e3 100644 --- a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp +++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp @@ -359,13 +359,10 @@ TEST_F(MCJITCAPITest, gva) { buildMCJITEngine(); buildAndRunPasses(); - union { - uint64_t raw; - int32_t *usable; - } valuePointer; - valuePointer.raw = LLVMGetGlobalValueAddress(Engine, "simple_value"); + uint64_t raw = LLVMGetGlobalValueAddress(Engine, "simple_value"); + int32_t *usable = (int32_t *) raw; - EXPECT_EQ(42, *valuePointer.usable); + EXPECT_EQ(42, *usable); } TEST_F(MCJITCAPITest, gfa) { @@ -376,13 +373,10 @@ TEST_F(MCJITCAPITest, gfa) { buildMCJITEngine(); buildAndRunPasses(); - union { - uint64_t raw; - int (*usable)(); - } functionPointer; - functionPointer.raw = LLVMGetFunctionAddress(Engine, "simple_function"); + uint64_t raw = LLVMGetFunctionAddress(Engine, "simple_function"); + int (*usable)() = (int (*)()) raw; - EXPECT_EQ(42, functionPointer.usable()); + EXPECT_EQ(42, usable()); } TEST_F(MCJITCAPITest, custom_memory_manager) { |